From bcc96405913ef1d134ac2eeba4a95e21a1a58407 Mon Sep 17 00:00:00 2001 From: Julien Herr Date: Fri, 22 May 2026 18:28:13 +0200 Subject: [PATCH] fix(feed): correct feed link, canonical id, and strip html wrapper from content - link: computed as /admin/feeds/:id/emails instead of stale site_url from KV - id: computed dynamically from baseUrl instead of stale feed_url from KV - item description/content: strip wrapper via extractBodyContent() so feed readers receive a body fragment, not a full HTML document Fixes RSS validator warnings: SelfDoesntMatchLocation (stale KV domain) and InvalidHTML (full HTML document inside /). Adds 8 tests covering extractBodyContent and the new feed/atom link assertions. Co-Authored-By: Claude Sonnet 4.6 --- src/utils/feed-generator.test.ts | 78 +++++++++++++++++++++++++++++++- src/utils/feed-generator.ts | 23 +++++++--- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/src/utils/feed-generator.test.ts b/src/utils/feed-generator.test.ts index 3527492..4f67980 100644 --- a/src/utils/feed-generator.test.ts +++ b/src/utils/feed-generator.test.ts @@ -1,5 +1,9 @@ import { describe, it, expect } from "vitest"; -import { generateRssFeed, generateAtomFeed } from "./feed-generator"; +import { + generateRssFeed, + generateAtomFeed, + extractBodyContent, +} from "./feed-generator"; import { FeedConfig, EmailData } from "../types"; const mockFeedConfig: FeedConfig = { @@ -36,6 +40,28 @@ const mockEmailWithAttachment: EmailData = { const BASE_URL = "https://test.getmynews.app"; const FEED_ID = "abc123"; +describe("extractBodyContent", () => { + it("extracts content inside tags", () => { + const html = "

Hello

"; + expect(extractBodyContent(html)).toBe("

Hello

"); + }); + + it("handles body tag with attributes", () => { + const html = '

Hi

'; + expect(extractBodyContent(html)).toBe("

Hi

"); + }); + + it("returns html unchanged when no body tags present", () => { + const fragment = "

Already a fragment

"; + expect(extractBodyContent(fragment)).toBe(fragment); + }); + + it("is case-insensitive for body tag matching", () => { + const html = "

content

"; + expect(extractBodyContent(html)).toBe("

content

"); + }); +}); + describe("generateRssFeed", () => { it("returns RSS 2.0 with channel element", () => { const result = generateRssFeed( @@ -110,6 +136,31 @@ describe("generateRssFeed", () => { expect(result).toContain(""); expect(result).not.toContain(""); }); + + it("feed link points to admin emails page", () => { + const result = generateRssFeed( + mockFeedConfig, + mockEmails, + BASE_URL, + FEED_ID, + ); + expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`); + }); + + it("strips html/head/body wrapper from item description", () => { + const emailWithFullHtml: EmailData = { + ...mockEmails[0], + content: "

Body only

", + }; + const result = generateRssFeed( + mockFeedConfig, + [emailWithFullHtml], + BASE_URL, + FEED_ID, + ); + expect(result).toContain("

Body only

"); + expect(result).not.toContain(""); + }); }); describe("generateAtomFeed", () => { @@ -191,6 +242,31 @@ describe("generateAtomFeed", () => { expect(result).not.toContain(""); }); + it("feed link points to admin emails page", () => { + const result = generateAtomFeed( + mockFeedConfig, + mockEmails, + BASE_URL, + FEED_ID, + ); + expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`); + }); + + it("strips html/head/body wrapper from entry content", () => { + const emailWithFullHtml: EmailData = { + ...mockEmails[0], + content: "

Body only

", + }; + const result = generateAtomFeed( + mockFeedConfig, + [emailWithFullHtml], + BASE_URL, + FEED_ID, + ); + expect(result).toContain("

Body only

"); + expect(result).not.toContain(""); + }); + it("handles config without description", () => { const configNoDesc: FeedConfig = { ...mockFeedConfig, diff --git a/src/utils/feed-generator.ts b/src/utils/feed-generator.ts index 6f6968d..a4d466e 100644 --- a/src/utils/feed-generator.ts +++ b/src/utils/feed-generator.ts @@ -13,6 +13,13 @@ function parseFromAddress(from: string): { name: string; email?: string } { return { name: from.trim() }; } +// Email content is stored as a full HTML document. Feed readers expect only +// the body fragment in /, not a full document. +export function extractBodyContent(html: string): string { + const match = html.match(/]*>([\s\S]*?)<\/body>/i); + return match ? match[1] : html; +} + function buildFeed( feedConfig: FeedConfig, emails: EmailData[], @@ -22,11 +29,14 @@ function buildFeed( const feed = new Feed({ title: feedConfig.title, description: feedConfig.description || "", - id: feedConfig.feed_url, - link: feedConfig.site_url, + // Computed dynamically so the id is always canonical regardless of what + // was stored in KV at feed-creation time (which may have used a stale domain). + id: `${baseUrl}/rss/${feedId}`, + // Link points to the admin emails page — the "website" this feed represents. + link: `${baseUrl}/admin/feeds/${feedId}/emails`, language: feedConfig.language, updated: new Date(), - generator: "Email-to-RSS", + generator: "kill-the-news", copyright: `Copyright © ${new Date().getFullYear()} ${feedConfig.title}`, feedLinks: { rss: `${baseUrl}/rss/${feedId}`, @@ -35,7 +45,7 @@ function buildFeed( author: feedConfig.author ? { name: feedConfig.author, - email: `noreply@${new URL(feedConfig.site_url).hostname}`, + email: `noreply@${new URL(baseUrl).hostname}`, } : undefined, }); @@ -43,12 +53,13 @@ function buildFeed( for (const email of emails) { const uniqueId = `${email.receivedAt}-${Buffer.from(email.subject).toString("base64").substring(0, 10)}`; const firstAttachment = email.attachments?.[0]; + const bodyContent = extractBodyContent(email.content); feed.addItem({ title: email.subject, id: uniqueId, link: `${baseUrl}/entries/${feedId}/${email.receivedAt}`, - description: email.content, - content: email.content, + description: bodyContent, + content: bodyContent, author: [parseFromAddress(email.from)], date: new Date(email.receivedAt), enclosure: firstAttachment