mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
fix(feed): correct feed link, canonical id, and strip html wrapper from content
- link: computed as /admin/feeds/:id/emails instead of stale site_url from KV - id: computed dynamically from baseUrl instead of stale feed_url from KV - item description/content: strip <html><head><body> wrapper via extractBodyContent() so feed readers receive a body fragment, not a full HTML document Fixes RSS validator warnings: SelfDoesntMatchLocation (stale KV domain) and InvalidHTML (full HTML document inside <description>/<content:encoded>). Adds 8 tests covering extractBodyContent and the new feed/atom link assertions. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,9 @@
|
|||||||
import { describe, it, expect } from "vitest";
|
import { describe, it, expect } from "vitest";
|
||||||
import { generateRssFeed, generateAtomFeed } from "./feed-generator";
|
import {
|
||||||
|
generateRssFeed,
|
||||||
|
generateAtomFeed,
|
||||||
|
extractBodyContent,
|
||||||
|
} from "./feed-generator";
|
||||||
import { FeedConfig, EmailData } from "../types";
|
import { FeedConfig, EmailData } from "../types";
|
||||||
|
|
||||||
const mockFeedConfig: FeedConfig = {
|
const mockFeedConfig: FeedConfig = {
|
||||||
@@ -36,6 +40,28 @@ const mockEmailWithAttachment: EmailData = {
|
|||||||
const BASE_URL = "https://test.getmynews.app";
|
const BASE_URL = "https://test.getmynews.app";
|
||||||
const FEED_ID = "abc123";
|
const FEED_ID = "abc123";
|
||||||
|
|
||||||
|
describe("extractBodyContent", () => {
|
||||||
|
it("extracts content inside <body> tags", () => {
|
||||||
|
const html = "<html><head></head><body><p>Hello</p></body></html>";
|
||||||
|
expect(extractBodyContent(html)).toBe("<p>Hello</p>");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles body tag with attributes", () => {
|
||||||
|
const html = '<html><body style="margin:0"><p>Hi</p></body></html>';
|
||||||
|
expect(extractBodyContent(html)).toBe("<p>Hi</p>");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns html unchanged when no body tags present", () => {
|
||||||
|
const fragment = "<p>Already a fragment</p>";
|
||||||
|
expect(extractBodyContent(fragment)).toBe(fragment);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("is case-insensitive for body tag matching", () => {
|
||||||
|
const html = "<HTML><BODY><p>content</p></BODY></HTML>";
|
||||||
|
expect(extractBodyContent(html)).toBe("<p>content</p>");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("generateRssFeed", () => {
|
describe("generateRssFeed", () => {
|
||||||
it("returns RSS 2.0 with channel element", () => {
|
it("returns RSS 2.0 with channel element", () => {
|
||||||
const result = generateRssFeed(
|
const result = generateRssFeed(
|
||||||
@@ -110,6 +136,31 @@ describe("generateRssFeed", () => {
|
|||||||
expect(result).toContain("<channel>");
|
expect(result).toContain("<channel>");
|
||||||
expect(result).not.toContain("<item>");
|
expect(result).not.toContain("<item>");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("feed link points to admin emails page", () => {
|
||||||
|
const result = generateRssFeed(
|
||||||
|
mockFeedConfig,
|
||||||
|
mockEmails,
|
||||||
|
BASE_URL,
|
||||||
|
FEED_ID,
|
||||||
|
);
|
||||||
|
expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips html/head/body wrapper from item description", () => {
|
||||||
|
const emailWithFullHtml: EmailData = {
|
||||||
|
...mockEmails[0],
|
||||||
|
content: "<html><head></head><body><p>Body only</p></body></html>",
|
||||||
|
};
|
||||||
|
const result = generateRssFeed(
|
||||||
|
mockFeedConfig,
|
||||||
|
[emailWithFullHtml],
|
||||||
|
BASE_URL,
|
||||||
|
FEED_ID,
|
||||||
|
);
|
||||||
|
expect(result).toContain("<p>Body only</p>");
|
||||||
|
expect(result).not.toContain("<html>");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("generateAtomFeed", () => {
|
describe("generateAtomFeed", () => {
|
||||||
@@ -191,6 +242,31 @@ describe("generateAtomFeed", () => {
|
|||||||
expect(result).not.toContain("<entry>");
|
expect(result).not.toContain("<entry>");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("feed link points to admin emails page", () => {
|
||||||
|
const result = generateAtomFeed(
|
||||||
|
mockFeedConfig,
|
||||||
|
mockEmails,
|
||||||
|
BASE_URL,
|
||||||
|
FEED_ID,
|
||||||
|
);
|
||||||
|
expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips html/head/body wrapper from entry content", () => {
|
||||||
|
const emailWithFullHtml: EmailData = {
|
||||||
|
...mockEmails[0],
|
||||||
|
content: "<html><head></head><body><p>Body only</p></body></html>",
|
||||||
|
};
|
||||||
|
const result = generateAtomFeed(
|
||||||
|
mockFeedConfig,
|
||||||
|
[emailWithFullHtml],
|
||||||
|
BASE_URL,
|
||||||
|
FEED_ID,
|
||||||
|
);
|
||||||
|
expect(result).toContain("<p>Body only</p>");
|
||||||
|
expect(result).not.toContain("<html>");
|
||||||
|
});
|
||||||
|
|
||||||
it("handles config without description", () => {
|
it("handles config without description", () => {
|
||||||
const configNoDesc: FeedConfig = {
|
const configNoDesc: FeedConfig = {
|
||||||
...mockFeedConfig,
|
...mockFeedConfig,
|
||||||
|
|||||||
@@ -13,6 +13,13 @@ function parseFromAddress(from: string): { name: string; email?: string } {
|
|||||||
return { name: from.trim() };
|
return { name: from.trim() };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Email content is stored as a full HTML document. Feed readers expect only
|
||||||
|
// the body fragment in <description>/<content:encoded>, not a full document.
|
||||||
|
export function extractBodyContent(html: string): string {
|
||||||
|
const match = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
||||||
|
return match ? match[1] : html;
|
||||||
|
}
|
||||||
|
|
||||||
function buildFeed(
|
function buildFeed(
|
||||||
feedConfig: FeedConfig,
|
feedConfig: FeedConfig,
|
||||||
emails: EmailData[],
|
emails: EmailData[],
|
||||||
@@ -22,11 +29,14 @@ function buildFeed(
|
|||||||
const feed = new Feed({
|
const feed = new Feed({
|
||||||
title: feedConfig.title,
|
title: feedConfig.title,
|
||||||
description: feedConfig.description || "",
|
description: feedConfig.description || "",
|
||||||
id: feedConfig.feed_url,
|
// Computed dynamically so the id is always canonical regardless of what
|
||||||
link: feedConfig.site_url,
|
// was stored in KV at feed-creation time (which may have used a stale domain).
|
||||||
|
id: `${baseUrl}/rss/${feedId}`,
|
||||||
|
// Link points to the admin emails page — the "website" this feed represents.
|
||||||
|
link: `${baseUrl}/admin/feeds/${feedId}/emails`,
|
||||||
language: feedConfig.language,
|
language: feedConfig.language,
|
||||||
updated: new Date(),
|
updated: new Date(),
|
||||||
generator: "Email-to-RSS",
|
generator: "kill-the-news",
|
||||||
copyright: `Copyright © ${new Date().getFullYear()} ${feedConfig.title}`,
|
copyright: `Copyright © ${new Date().getFullYear()} ${feedConfig.title}`,
|
||||||
feedLinks: {
|
feedLinks: {
|
||||||
rss: `${baseUrl}/rss/${feedId}`,
|
rss: `${baseUrl}/rss/${feedId}`,
|
||||||
@@ -35,7 +45,7 @@ function buildFeed(
|
|||||||
author: feedConfig.author
|
author: feedConfig.author
|
||||||
? {
|
? {
|
||||||
name: feedConfig.author,
|
name: feedConfig.author,
|
||||||
email: `noreply@${new URL(feedConfig.site_url).hostname}`,
|
email: `noreply@${new URL(baseUrl).hostname}`,
|
||||||
}
|
}
|
||||||
: undefined,
|
: undefined,
|
||||||
});
|
});
|
||||||
@@ -43,12 +53,13 @@ function buildFeed(
|
|||||||
for (const email of emails) {
|
for (const email of emails) {
|
||||||
const uniqueId = `${email.receivedAt}-${Buffer.from(email.subject).toString("base64").substring(0, 10)}`;
|
const uniqueId = `${email.receivedAt}-${Buffer.from(email.subject).toString("base64").substring(0, 10)}`;
|
||||||
const firstAttachment = email.attachments?.[0];
|
const firstAttachment = email.attachments?.[0];
|
||||||
|
const bodyContent = extractBodyContent(email.content);
|
||||||
feed.addItem({
|
feed.addItem({
|
||||||
title: email.subject,
|
title: email.subject,
|
||||||
id: uniqueId,
|
id: uniqueId,
|
||||||
link: `${baseUrl}/entries/${feedId}/${email.receivedAt}`,
|
link: `${baseUrl}/entries/${feedId}/${email.receivedAt}`,
|
||||||
description: email.content,
|
description: bodyContent,
|
||||||
content: email.content,
|
content: bodyContent,
|
||||||
author: [parseFromAddress(email.from)],
|
author: [parseFromAddress(email.from)],
|
||||||
date: new Date(email.receivedAt),
|
date: new Date(email.receivedAt),
|
||||||
enclosure: firstAttachment
|
enclosure: firstAttachment
|
||||||
|
|||||||
Reference in New Issue
Block a user