feat: WebSub Atom support, HTML processing via linkedom, W3C badges

WebSub / PubSubHubbub:
- Hub now accepts both /rss/:id and /atom/:id topic URLs
- WebSubSubscription stores format ("rss" | "atom")
- notifySubscribers sends RSS or Atom XML with correct Content-Type
- verifyAndStoreSubscription sends correct topic URL per format
- CI paths-ignore docs/** to skip deploy on docs-only changes

HTML processing (linkedom + escape-html):
- New html-processor.ts: body extraction, script/iframe/object removal,
  event handler + javascript: URL stripping, mso-* style cleanup,
  plain text → <pre> with HTML escaping via escape-html
- feed-generator.ts and entries.ts use processEmailContent

Admin UI:
- W3C validation badges (Atom + RSS) on feed detail page

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-22 21:12:10 +02:00
parent 1789870f27
commit a29e9ab372
13 changed files with 719 additions and 69 deletions
+4 -24
View File
@@ -1,5 +1,8 @@
import { Feed } from "feed";
import { FeedConfig, EmailData } from "../types";
import { processEmailContent } from "./html-processor";
export { processEmailContent as extractBodyContent };
function parseFromAddress(from: string): { name: string; email?: string } {
const match = from.match(/^(.*?)\s*<([^>]+)>\s*$/);
@@ -13,29 +16,6 @@ function parseFromAddress(from: string): { name: string; email?: string } {
return { name: from.trim() };
}
// Email content is stored as a full HTML document. Feed readers expect only
// the body fragment in <description>/<content:encoded>, not a full document.
export function extractBodyContent(html: string): string {
const withClose = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
const body = withClose
? withClose[1]
: (() => {
const withoutClose = html.match(/<body[^>]*>([\s\S]*)/i);
return withoutClose
? withoutClose[1].replace(/<\/html>\s*$/i, "")
: html;
})();
// Strip mso-* properties from inline styles (Office HTML — triggers feed validator warnings)
return body.replace(/\bstyle="([^"]*)"/gi, (_match, style: string) => {
const cleaned = style
.split(";")
.map((p) => p.trim())
.filter((p) => p && !/^mso-/i.test(p))
.join("; ");
return cleaned ? `style="${cleaned}"` : "";
});
}
function buildFeed(
feedConfig: FeedConfig,
emails: EmailData[],
@@ -70,7 +50,7 @@ function buildFeed(
for (const email of emails) {
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
const firstAttachment = email.attachments?.[0];
const bodyContent = extractBodyContent(email.content);
const bodyContent = processEmailContent(email.content);
feed.addItem({
title: email.subject,
id: entryUrl,