fix(feed): correct feed link, canonical id, and strip html wrapper from content

- link: computed as /admin/feeds/:id/emails instead of stale site_url from KV
- id: computed dynamically from baseUrl instead of stale feed_url from KV
- item description/content: strip <html><head><body> wrapper via extractBodyContent()
  so feed readers receive a body fragment, not a full HTML document

Fixes RSS validator warnings: SelfDoesntMatchLocation (stale KV domain) and
InvalidHTML (full HTML document inside <description>/<content:encoded>).
Adds 8 tests covering extractBodyContent and the new feed/atom link assertions.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-22 18:28:13 +02:00
parent 780bf6c190
commit bcc9640591
2 changed files with 94 additions and 7 deletions
+77 -1
View File
@@ -1,5 +1,9 @@
import { describe, it, expect } from "vitest"; import { describe, it, expect } from "vitest";
import { generateRssFeed, generateAtomFeed } from "./feed-generator"; import {
generateRssFeed,
generateAtomFeed,
extractBodyContent,
} from "./feed-generator";
import { FeedConfig, EmailData } from "../types"; import { FeedConfig, EmailData } from "../types";
const mockFeedConfig: FeedConfig = { const mockFeedConfig: FeedConfig = {
@@ -36,6 +40,28 @@ const mockEmailWithAttachment: EmailData = {
const BASE_URL = "https://test.getmynews.app"; const BASE_URL = "https://test.getmynews.app";
const FEED_ID = "abc123"; const FEED_ID = "abc123";
describe("extractBodyContent", () => {
it("extracts content inside <body> tags", () => {
const html = "<html><head></head><body><p>Hello</p></body></html>";
expect(extractBodyContent(html)).toBe("<p>Hello</p>");
});
it("handles body tag with attributes", () => {
const html = '<html><body style="margin:0"><p>Hi</p></body></html>';
expect(extractBodyContent(html)).toBe("<p>Hi</p>");
});
it("returns html unchanged when no body tags present", () => {
const fragment = "<p>Already a fragment</p>";
expect(extractBodyContent(fragment)).toBe(fragment);
});
it("is case-insensitive for body tag matching", () => {
const html = "<HTML><BODY><p>content</p></BODY></HTML>";
expect(extractBodyContent(html)).toBe("<p>content</p>");
});
});
describe("generateRssFeed", () => { describe("generateRssFeed", () => {
it("returns RSS 2.0 with channel element", () => { it("returns RSS 2.0 with channel element", () => {
const result = generateRssFeed( const result = generateRssFeed(
@@ -110,6 +136,31 @@ describe("generateRssFeed", () => {
expect(result).toContain("<channel>"); expect(result).toContain("<channel>");
expect(result).not.toContain("<item>"); expect(result).not.toContain("<item>");
}); });
it("feed link points to admin emails page", () => {
const result = generateRssFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`);
});
it("strips html/head/body wrapper from item description", () => {
const emailWithFullHtml: EmailData = {
...mockEmails[0],
content: "<html><head></head><body><p>Body only</p></body></html>",
};
const result = generateRssFeed(
mockFeedConfig,
[emailWithFullHtml],
BASE_URL,
FEED_ID,
);
expect(result).toContain("<p>Body only</p>");
expect(result).not.toContain("<html>");
});
}); });
describe("generateAtomFeed", () => { describe("generateAtomFeed", () => {
@@ -191,6 +242,31 @@ describe("generateAtomFeed", () => {
expect(result).not.toContain("<entry>"); expect(result).not.toContain("<entry>");
}); });
it("feed link points to admin emails page", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`);
});
it("strips html/head/body wrapper from entry content", () => {
const emailWithFullHtml: EmailData = {
...mockEmails[0],
content: "<html><head></head><body><p>Body only</p></body></html>",
};
const result = generateAtomFeed(
mockFeedConfig,
[emailWithFullHtml],
BASE_URL,
FEED_ID,
);
expect(result).toContain("<p>Body only</p>");
expect(result).not.toContain("<html>");
});
it("handles config without description", () => { it("handles config without description", () => {
const configNoDesc: FeedConfig = { const configNoDesc: FeedConfig = {
...mockFeedConfig, ...mockFeedConfig,
+17 -6
View File
@@ -13,6 +13,13 @@ function parseFromAddress(from: string): { name: string; email?: string } {
return { name: from.trim() }; return { name: from.trim() };
} }
// Email content is stored as a full HTML document. Feed readers expect only
// the body fragment in <description>/<content:encoded>, not a full document.
export function extractBodyContent(html: string): string {
const match = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
return match ? match[1] : html;
}
function buildFeed( function buildFeed(
feedConfig: FeedConfig, feedConfig: FeedConfig,
emails: EmailData[], emails: EmailData[],
@@ -22,11 +29,14 @@ function buildFeed(
const feed = new Feed({ const feed = new Feed({
title: feedConfig.title, title: feedConfig.title,
description: feedConfig.description || "", description: feedConfig.description || "",
id: feedConfig.feed_url, // Computed dynamically so the id is always canonical regardless of what
link: feedConfig.site_url, // was stored in KV at feed-creation time (which may have used a stale domain).
id: `${baseUrl}/rss/${feedId}`,
// Link points to the admin emails page — the "website" this feed represents.
link: `${baseUrl}/admin/feeds/${feedId}/emails`,
language: feedConfig.language, language: feedConfig.language,
updated: new Date(), updated: new Date(),
generator: "Email-to-RSS", generator: "kill-the-news",
copyright: `Copyright © ${new Date().getFullYear()} ${feedConfig.title}`, copyright: `Copyright © ${new Date().getFullYear()} ${feedConfig.title}`,
feedLinks: { feedLinks: {
rss: `${baseUrl}/rss/${feedId}`, rss: `${baseUrl}/rss/${feedId}`,
@@ -35,7 +45,7 @@ function buildFeed(
author: feedConfig.author author: feedConfig.author
? { ? {
name: feedConfig.author, name: feedConfig.author,
email: `noreply@${new URL(feedConfig.site_url).hostname}`, email: `noreply@${new URL(baseUrl).hostname}`,
} }
: undefined, : undefined,
}); });
@@ -43,12 +53,13 @@ function buildFeed(
for (const email of emails) { for (const email of emails) {
const uniqueId = `${email.receivedAt}-${Buffer.from(email.subject).toString("base64").substring(0, 10)}`; const uniqueId = `${email.receivedAt}-${Buffer.from(email.subject).toString("base64").substring(0, 10)}`;
const firstAttachment = email.attachments?.[0]; const firstAttachment = email.attachments?.[0];
const bodyContent = extractBodyContent(email.content);
feed.addItem({ feed.addItem({
title: email.subject, title: email.subject,
id: uniqueId, id: uniqueId,
link: `${baseUrl}/entries/${feedId}/${email.receivedAt}`, link: `${baseUrl}/entries/${feedId}/${email.receivedAt}`,
description: email.content, description: bodyContent,
content: email.content, content: bodyContent,
author: [parseFromAddress(email.from)], author: [parseFromAddress(email.from)],
date: new Date(email.receivedAt), date: new Date(email.receivedAt),
enclosure: firstAttachment enclosure: firstAttachment