mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
feat: WebSub Atom support, HTML processing via linkedom, W3C badges
WebSub / PubSubHubbub:
- Hub now accepts both /rss/:id and /atom/:id topic URLs
- WebSubSubscription stores format ("rss" | "atom")
- notifySubscribers sends RSS or Atom XML with correct Content-Type
- verifyAndStoreSubscription sends correct topic URL per format
- CI paths-ignore docs/** to skip deploy on docs-only changes
HTML processing (linkedom + escape-html):
- New html-processor.ts: body extraction, script/iframe/object removal,
event handler + javascript: URL stripping, mso-* style cleanup,
plain text → <pre> with HTML escaping via escape-html
- feed-generator.ts and entries.ts use processEmailContent
Admin UI:
- W3C validation badges (Atom + RSS) on feed detail page
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import { Feed } from "feed";
|
||||
import { FeedConfig, EmailData } from "../types";
|
||||
import { processEmailContent } from "./html-processor";
|
||||
|
||||
export { processEmailContent as extractBodyContent };
|
||||
|
||||
function parseFromAddress(from: string): { name: string; email?: string } {
|
||||
const match = from.match(/^(.*?)\s*<([^>]+)>\s*$/);
|
||||
@@ -13,29 +16,6 @@ function parseFromAddress(from: string): { name: string; email?: string } {
|
||||
return { name: from.trim() };
|
||||
}
|
||||
|
||||
// Email content is stored as a full HTML document. Feed readers expect only
|
||||
// the body fragment in <description>/<content:encoded>, not a full document.
|
||||
export function extractBodyContent(html: string): string {
|
||||
const withClose = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
|
||||
const body = withClose
|
||||
? withClose[1]
|
||||
: (() => {
|
||||
const withoutClose = html.match(/<body[^>]*>([\s\S]*)/i);
|
||||
return withoutClose
|
||||
? withoutClose[1].replace(/<\/html>\s*$/i, "")
|
||||
: html;
|
||||
})();
|
||||
// Strip mso-* properties from inline styles (Office HTML — triggers feed validator warnings)
|
||||
return body.replace(/\bstyle="([^"]*)"/gi, (_match, style: string) => {
|
||||
const cleaned = style
|
||||
.split(";")
|
||||
.map((p) => p.trim())
|
||||
.filter((p) => p && !/^mso-/i.test(p))
|
||||
.join("; ");
|
||||
return cleaned ? `style="${cleaned}"` : "";
|
||||
});
|
||||
}
|
||||
|
||||
function buildFeed(
|
||||
feedConfig: FeedConfig,
|
||||
emails: EmailData[],
|
||||
@@ -70,7 +50,7 @@ function buildFeed(
|
||||
for (const email of emails) {
|
||||
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
|
||||
const firstAttachment = email.attachments?.[0];
|
||||
const bodyContent = extractBodyContent(email.content);
|
||||
const bodyContent = processEmailContent(email.content);
|
||||
feed.addItem({
|
||||
title: email.subject,
|
||||
id: entryUrl,
|
||||
|
||||
Reference in New Issue
Block a user