feat: WebSub Atom support, HTML processing via linkedom, W3C badges

WebSub / PubSubHubbub:
- Hub now accepts both /rss/:id and /atom/:id topic URLs
- WebSubSubscription stores format ("rss" | "atom")
- notifySubscribers sends RSS or Atom XML with correct Content-Type
- verifyAndStoreSubscription sends correct topic URL per format
- CI paths-ignore docs/** to skip deploy on docs-only changes

HTML processing (linkedom + escape-html):
- New html-processor.ts: body extraction, script/iframe/object removal,
  event handler + javascript: URL stripping, mso-* style cleanup,
  plain text → <pre> with HTML escaping via escape-html
- feed-generator.ts and entries.ts use processEmailContent

Admin UI:
- W3C validation badges (Atom + RSS) on feed detail page

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-22 21:12:10 +02:00
parent 1789870f27
commit a29e9ab372
13 changed files with 719 additions and 69 deletions
+4
View File
@@ -3,8 +3,12 @@ name: CI
on: on:
push: push:
branches: ["main"] branches: ["main"]
paths-ignore:
- "docs/**"
pull_request: pull_request:
branches: ["main"] branches: ["main"]
paths-ignore:
- "docs/**"
jobs: jobs:
ci: ci:
+190 -1
View File
@@ -10,13 +10,16 @@
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@hono/zod-validator": "^0.8.0", "@hono/zod-validator": "^0.8.0",
"escape-html": "^1.0.3",
"feed": "5.2.1", "feed": "5.2.1",
"hono": "4.12.22", "hono": "4.12.22",
"linkedom": "^0.18.12",
"postal-mime": "^2.7.4", "postal-mime": "^2.7.4",
"zod": "4.4.3" "zod": "4.4.3"
}, },
"devDependencies": { "devDependencies": {
"@cloudflare/workers-types": "4.20260522.1", "@cloudflare/workers-types": "4.20260522.1",
"@types/escape-html": "^1.0.4",
"@types/mailparser": "^3.4.6", "@types/mailparser": "^3.4.6",
"@types/rss": "^0.0.32", "@types/rss": "^0.0.32",
"@vitest/coverage-v8": "4.1.7", "@vitest/coverage-v8": "4.1.7",
@@ -2007,6 +2010,13 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/@types/escape-html": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/@types/escape-html/-/escape-html-1.0.4.tgz",
"integrity": "sha512-qZ72SFTgUAZ5a7Tj6kf2SHLetiH5S6f8G5frB2SPQ3EyF02kxdyBFf4Tz4banE3xCgGnKgWLt//a6VuYHKYJTg==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/esrecurse": { "node_modules/@types/esrecurse": {
"version": "4.3.1", "version": "4.3.1",
"resolved": "https://registry.npmjs.org/@types/esrecurse/-/esrecurse-4.3.1.tgz", "resolved": "https://registry.npmjs.org/@types/esrecurse/-/esrecurse-4.3.1.tgz",
@@ -2566,6 +2576,12 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
"license": "ISC"
},
"node_modules/brace-expansion": { "node_modules/brace-expansion": {
"version": "5.0.6", "version": "5.0.6",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.6.tgz", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.6.tgz",
@@ -2749,6 +2765,40 @@
"node": ">= 8" "node": ">= 8"
} }
}, },
"node_modules/css-select": {
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz",
"integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz",
"integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==",
"license": "BSD-2-Clause",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cssom": {
"version": "0.5.0",
"resolved": "https://registry.npmjs.org/cssom/-/cssom-0.5.0.tgz",
"integrity": "sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==",
"license": "MIT"
},
"node_modules/debug": { "node_modules/debug": {
"version": "4.4.3", "version": "4.4.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -2784,6 +2834,73 @@
"node": ">=8" "node": ">=8"
} }
}, },
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"license": "MIT",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/dom-serializer/node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "BSD-2-Clause"
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"license": "BSD-2-Clause",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz",
"integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==",
"license": "BSD-2-Clause",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/emoji-regex": { "node_modules/emoji-regex": {
"version": "8.0.0", "version": "8.0.0",
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
@@ -2795,7 +2912,6 @@
"version": "7.0.1", "version": "7.0.1",
"resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz",
"integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==",
"dev": true,
"license": "BSD-2-Clause", "license": "BSD-2-Clause",
"engines": { "engines": {
"node": ">=0.12" "node": ">=0.12"
@@ -2886,6 +3002,12 @@
"node": ">=6" "node": ">=6"
} }
}, },
"node_modules/escape-html": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
"integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==",
"license": "MIT"
},
"node_modules/escape-string-regexp": { "node_modules/escape-string-regexp": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -3340,6 +3462,25 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/htmlparser2": {
"version": "10.1.0",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz",
"integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "MIT",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.2.2",
"entities": "^7.0.1"
}
},
"node_modules/husky": { "node_modules/husky": {
"version": "9.1.7", "version": "9.1.7",
"resolved": "https://registry.npmjs.org/husky/-/husky-9.1.7.tgz", "resolved": "https://registry.npmjs.org/husky/-/husky-9.1.7.tgz",
@@ -3810,6 +3951,36 @@
"url": "https://opencollective.com/parcel" "url": "https://opencollective.com/parcel"
} }
}, },
"node_modules/linkedom": {
"version": "0.18.12",
"resolved": "https://registry.npmjs.org/linkedom/-/linkedom-0.18.12.tgz",
"integrity": "sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q==",
"license": "ISC",
"dependencies": {
"css-select": "^5.1.0",
"cssom": "^0.5.0",
"html-escaper": "^3.0.3",
"htmlparser2": "^10.0.0",
"uhyphen": "^0.2.0"
},
"engines": {
"node": ">=16"
},
"peerDependencies": {
"canvas": ">= 2"
},
"peerDependenciesMeta": {
"canvas": {
"optional": true
}
}
},
"node_modules/linkedom/node_modules/html-escaper": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-3.0.3.tgz",
"integrity": "sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==",
"license": "MIT"
},
"node_modules/lint-staged": { "node_modules/lint-staged": {
"version": "17.0.5", "version": "17.0.5",
"resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-17.0.5.tgz", "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-17.0.5.tgz",
@@ -4249,6 +4420,18 @@
"dev": true, "dev": true,
"license": "MIT" "license": "MIT"
}, },
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/obug": { "node_modules/obug": {
"version": "2.1.1", "version": "2.1.1",
"resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz", "resolved": "https://registry.npmjs.org/obug/-/obug-2.1.1.tgz",
@@ -5017,6 +5200,12 @@
"typescript": ">=4.8.4 <6.1.0" "typescript": ">=4.8.4 <6.1.0"
} }
}, },
"node_modules/uhyphen": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/uhyphen/-/uhyphen-0.2.0.tgz",
"integrity": "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==",
"license": "ISC"
},
"node_modules/undici": { "node_modules/undici": {
"version": "7.24.8", "version": "7.24.8",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.24.8.tgz", "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.8.tgz",
+3
View File
@@ -32,6 +32,7 @@
"license": "MIT", "license": "MIT",
"devDependencies": { "devDependencies": {
"@cloudflare/workers-types": "4.20260522.1", "@cloudflare/workers-types": "4.20260522.1",
"@types/escape-html": "^1.0.4",
"@types/mailparser": "^3.4.6", "@types/mailparser": "^3.4.6",
"@types/rss": "^0.0.32", "@types/rss": "^0.0.32",
"@vitest/coverage-v8": "4.1.7", "@vitest/coverage-v8": "4.1.7",
@@ -50,8 +51,10 @@
}, },
"dependencies": { "dependencies": {
"@hono/zod-validator": "^0.8.0", "@hono/zod-validator": "^0.8.0",
"escape-html": "^1.0.3",
"feed": "5.2.1", "feed": "5.2.1",
"hono": "4.12.22", "hono": "4.12.22",
"linkedom": "^0.18.12",
"postal-mime": "^2.7.4", "postal-mime": "^2.7.4",
"zod": "4.4.3" "zod": "4.4.3"
} }
+24
View File
@@ -116,6 +116,30 @@ emailsRouter.get("/feeds/:feedId/emails", async (c) => {
<CopyField label="RSS Feed:" value={rssUrl} /> <CopyField label="RSS Feed:" value={rssUrl} />
<CopyField label="Atom Feed:" value={atomUrl} /> <CopyField label="Atom Feed:" value={atomUrl} />
</div> </div>
<div class="feed-validate">
<a
href={`https://validator.w3.org/feed/check.cgi?url=${encodeURIComponent(atomUrl)}`}
target="_blank"
rel="noopener noreferrer"
>
<img
src="https://validator.w3.org/feed/images/valid-atom.png"
alt="[Valid Atom 1.0]"
title="Validate my Atom 1.0 feed"
/>
</a>
<a
href={`https://validator.w3.org/feed/check.cgi?url=${encodeURIComponent(rssUrl)}`}
target="_blank"
rel="noopener noreferrer"
>
<img
src="https://validator.w3.org/feed/images/valid-rss-rogers.png"
alt="[Valid RSS]"
title="Validate my RSS feed"
/>
</a>
</div>
</div> </div>
<h2> <h2>
+4 -1
View File
@@ -1,6 +1,7 @@
import { Context } from "hono"; import { Context } from "hono";
import { html, raw } from "hono/html"; import { html, raw } from "hono/html";
import { Env, FeedMetadata, EmailData } from "../types"; import { Env, FeedMetadata, EmailData } from "../types";
import { processEmailContent } from "../utils/html-processor";
export async function handle(c: Context<{ Bindings: Env }>): Promise<Response> { export async function handle(c: Context<{ Bindings: Env }>): Promise<Response> {
const feedId = c.req.param("feedId"); const feedId = c.req.param("feedId");
@@ -82,7 +83,9 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise<Response> {
<dt>Date:</dt> <dt>Date:</dt>
<dd>${new Date(emailData.receivedAt).toUTCString()}</dd> <dd>${new Date(emailData.receivedAt).toUTCString()}</dd>
</dl> </dl>
<div class="content">${raw(emailData.content)}</div> <div class="content">
${raw(processEmailContent(emailData.content))}
</div>
</body> </body>
</html>`, </html>`,
); );
+83 -1
View File
@@ -124,6 +124,21 @@ describe("POST /hub — input validation", () => {
expect(res.status).toBe(400); expect(res.status).toBe(400);
}); });
it("returns 400 when hub.topic uses an unsupported path (not rss or atom)", async () => {
const app = makeApp();
const env = createMockEnv();
const res = await app.request(
"/hub",
hubBody({
"hub.mode": "subscribe",
"hub.topic": `https://${env.DOMAIN}/feed/feed1`,
"hub.callback": "https://cb.example/sub",
}),
env,
);
expect(res.status).toBe(400);
});
it("returns 400 when hub.secret exceeds 200 bytes", async () => { it("returns 400 when hub.secret exceeds 200 bytes", async () => {
const app = makeApp(); const app = makeApp();
const env = createMockEnv(); const env = createMockEnv();
@@ -213,10 +228,51 @@ describe("POST /hub — subscribe", () => {
); );
expect(res.status).toBe(404); expect(res.status).toBe(404);
}); });
it("returns 202 for valid Atom subscribe request", async () => {
const app = makeApp();
const env = createMockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({ title: "Feed 1" }),
);
server.use(
http.get("https://cb.example/sub", ({ request }) => {
const challenge =
new URL(request.url).searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge);
}),
);
const res = await app.request(
"/hub",
hubBody({
"hub.mode": "subscribe",
"hub.topic": `https://${env.DOMAIN}/atom/feed1`,
"hub.callback": "https://cb.example/sub",
}),
env,
);
expect(res.status).toBe(202);
});
it("returns 404 for Atom topic when feed does not exist", async () => {
const app = makeApp();
const env = createMockEnv();
const res = await app.request(
"/hub",
hubBody({
"hub.mode": "subscribe",
"hub.topic": `https://${env.DOMAIN}/atom/nonexistent`,
"hub.callback": "https://cb.example/sub",
}),
env,
);
expect(res.status).toBe(404);
});
}); });
describe("POST /hub — unsubscribe", () => { describe("POST /hub — unsubscribe", () => {
it("returns 202 for valid unsubscribe request", async () => { it("returns 202 for valid RSS unsubscribe request", async () => {
const app = makeApp(); const app = makeApp();
const env = createMockEnv(); const env = createMockEnv();
await env.EMAIL_STORAGE.put( await env.EMAIL_STORAGE.put(
@@ -241,4 +297,30 @@ describe("POST /hub — unsubscribe", () => {
); );
expect(res.status).toBe(202); expect(res.status).toBe(202);
}); });
it("returns 202 for valid Atom unsubscribe request", async () => {
const app = makeApp();
const env = createMockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({ title: "Feed 1" }),
);
server.use(
http.get("https://cb.example/sub", ({ request }) => {
const challenge =
new URL(request.url).searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge);
}),
);
const res = await app.request(
"/hub",
hubBody({
"hub.mode": "unsubscribe",
"hub.topic": `https://${env.DOMAIN}/atom/feed1`,
"hub.callback": "https://cb.example/sub",
}),
env,
);
expect(res.status).toBe(202);
});
}); });
+6 -4
View File
@@ -59,18 +59,19 @@ hubRouter.post("/", async (c) => {
return c.text("Bad Request: hub.callback must use HTTPS", 400); return c.text("Bad Request: hub.callback must use HTTPS", 400);
} }
// Validate that topic matches a known RSS feed on this hub // Validate that topic matches a known RSS or Atom feed on this hub
const topicPattern = new RegExp( const topicPattern = new RegExp(
`^https://${env.DOMAIN.replaceAll(".", "\\.")}/rss/([^/]+)$`, `^https://${env.DOMAIN.replaceAll(".", "\\.")}/(rss|atom)/([^/]+)$`,
); );
const match = topic.match(topicPattern); const match = topic.match(topicPattern);
if (!match) { if (!match) {
return c.text( return c.text(
"Bad Request: hub.topic must be an RSS feed URL on this hub", "Bad Request: hub.topic must be an RSS or Atom feed URL on this hub",
400, 400,
); );
} }
const feedId = match[1]; const format = match[1] as "rss" | "atom";
const feedId = match[2];
// Verify the feed exists before accepting any subscription // Verify the feed exists before accepting any subscription
const feedConfig = await env.EMAIL_STORAGE.get( const feedConfig = await env.EMAIL_STORAGE.get(
@@ -99,6 +100,7 @@ hubRouter.post("/", async (c) => {
callbackUrl as string, callbackUrl as string,
secret as string | undefined, secret as string | undefined,
leaseSeconds, leaseSeconds,
format,
env, env,
), ),
); );
+11
View File
@@ -954,6 +954,17 @@ table.table code {
border-color: rgba(255, 69, 58, 0.35); border-color: rgba(255, 69, 58, 0.35);
} }
/* Validation badges */
.feed-validate {
display: flex;
gap: 0.5rem;
margin-top: 1rem;
}
.feed-validate img {
display: block;
}
/* Feed and Email Lists */ /* Feed and Email Lists */
.feed-list, .feed-list,
.email-list { .email-list {
+4 -24
View File
@@ -1,5 +1,8 @@
import { Feed } from "feed"; import { Feed } from "feed";
import { FeedConfig, EmailData } from "../types"; import { FeedConfig, EmailData } from "../types";
import { processEmailContent } from "./html-processor";
export { processEmailContent as extractBodyContent };
function parseFromAddress(from: string): { name: string; email?: string } { function parseFromAddress(from: string): { name: string; email?: string } {
const match = from.match(/^(.*?)\s*<([^>]+)>\s*$/); const match = from.match(/^(.*?)\s*<([^>]+)>\s*$/);
@@ -13,29 +16,6 @@ function parseFromAddress(from: string): { name: string; email?: string } {
return { name: from.trim() }; return { name: from.trim() };
} }
// Email content is stored as a full HTML document. Feed readers expect only
// the body fragment in <description>/<content:encoded>, not a full document.
export function extractBodyContent(html: string): string {
const withClose = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
const body = withClose
? withClose[1]
: (() => {
const withoutClose = html.match(/<body[^>]*>([\s\S]*)/i);
return withoutClose
? withoutClose[1].replace(/<\/html>\s*$/i, "")
: html;
})();
// Strip mso-* properties from inline styles (Office HTML — triggers feed validator warnings)
return body.replace(/\bstyle="([^"]*)"/gi, (_match, style: string) => {
const cleaned = style
.split(";")
.map((p) => p.trim())
.filter((p) => p && !/^mso-/i.test(p))
.join("; ");
return cleaned ? `style="${cleaned}"` : "";
});
}
function buildFeed( function buildFeed(
feedConfig: FeedConfig, feedConfig: FeedConfig,
emails: EmailData[], emails: EmailData[],
@@ -70,7 +50,7 @@ function buildFeed(
for (const email of emails) { for (const email of emails) {
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`; const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
const firstAttachment = email.attachments?.[0]; const firstAttachment = email.attachments?.[0];
const bodyContent = extractBodyContent(email.content); const bodyContent = processEmailContent(email.content);
feed.addItem({ feed.addItem({
title: email.subject, title: email.subject,
id: entryUrl, id: entryUrl,
+125
View File
@@ -0,0 +1,125 @@
import { describe, it, expect } from "vitest";
import { processEmailContent } from "./html-processor";
describe("processEmailContent — body extraction", () => {
it("extracts content inside <body> tags", () => {
const html = "<html><head></head><body><p>Hello</p></body></html>";
expect(processEmailContent(html)).toBe("<p>Hello</p>");
});
it("handles body tag with attributes", () => {
const html = '<html><body style="margin:0"><p>Hi</p></body></html>';
expect(processEmailContent(html)).toBe("<p>Hi</p>");
});
it("returns fragment unchanged when no body tags present", () => {
const fragment = "<p>Already a fragment</p>";
expect(processEmailContent(fragment)).toBe("<p>Already a fragment</p>");
});
it("is case-insensitive for body tag matching", () => {
const html = "<HTML><BODY><p>content</p></BODY></HTML>";
expect(processEmailContent(html)).toBe("<p>content</p>");
});
});
describe("processEmailContent — plain text", () => {
it("wraps plain text in <pre>", () => {
const text = "Hello world\nSecond line";
const result = processEmailContent(text);
expect(result).toMatch(/^<pre /);
expect(result).toContain("Hello world\nSecond line");
});
it("escapes < and > in plain text", () => {
const text = "Price < 10 & size > 5";
const result = processEmailContent(text);
expect(result).toContain("&lt;");
expect(result).toContain("&gt;");
expect(result).toContain("&amp;");
expect(result).not.toContain("<10");
});
it("returns empty string for empty input", () => {
expect(processEmailContent("")).toBe("");
});
});
describe("processEmailContent — dangerous element removal", () => {
it("removes <script> tags", () => {
const html = "<body><p>Hello</p><script>alert('xss')</script></body>";
const result = processEmailContent(html);
expect(result).not.toContain("<script");
expect(result).not.toContain("alert");
expect(result).toContain("<p>Hello</p>");
});
it("removes <iframe> tags", () => {
const html =
"<body><iframe src='https://evil.com'></iframe><p>ok</p></body>";
const result = processEmailContent(html);
expect(result).not.toContain("<iframe");
expect(result).toContain("<p>ok</p>");
});
it("removes <object> and <embed> tags", () => {
const html = "<body><object></object><embed src='x'/><p>ok</p></body>";
const result = processEmailContent(html);
expect(result).not.toContain("<object");
expect(result).not.toContain("<embed");
});
});
describe("processEmailContent — attribute sanitization", () => {
it("removes event handler attributes", () => {
const html =
"<body><a href='https://x.com' onclick='evil()'>link</a></body>";
const result = processEmailContent(html);
expect(result).not.toContain("onclick");
expect(result).toContain('href="https://x.com"');
});
it("removes onerror on images", () => {
const html = "<body><img src='x' onerror='evil()' /></body>";
const result = processEmailContent(html);
expect(result).not.toContain("onerror");
});
it("removes javascript: hrefs", () => {
const html = "<body><a href='javascript:evil()'>click</a></body>";
const result = processEmailContent(html);
expect(result).not.toContain("javascript:");
});
it("preserves legitimate href and src attributes", () => {
const html =
"<body><a href='https://example.com'>link</a><img src='https://example.com/img.png'/></body>";
const result = processEmailContent(html);
expect(result).toContain("https://example.com");
});
});
describe("processEmailContent — mso style cleanup", () => {
it("strips mso-* properties from inline styles", () => {
const html =
'<body><p style="mso-margin-top: 0; color: red;">text</p></body>';
const result = processEmailContent(html);
expect(result).not.toContain("mso-margin-top");
expect(result).toContain("color: red");
});
it("removes style attribute entirely when only mso properties remain", () => {
const html =
'<body><p style="mso-line-height-rule: exactly;">text</p></body>';
const result = processEmailContent(html);
expect(result).not.toContain("style=");
});
it("preserves style attribute when non-mso properties remain", () => {
const html =
'<body><p style="mso-font-size: 12pt; font-weight: bold;">text</p></body>';
const result = processEmailContent(html);
expect(result).toContain("font-weight");
expect(result).not.toContain("mso-font-size");
});
});
+73
View File
@@ -0,0 +1,73 @@
import { parseHTML } from "linkedom";
import escapeHtml from "escape-html";
function cleanMsoStyles(style: string): string {
return style
.split(";")
.map((p) => p.trim())
.filter((p) => p && !/^mso-/i.test(p))
.join("; ");
}
function isPlainText(content: string): boolean {
return !/<[a-z][\s\S]*>/i.test(content);
}
function sanitizeElement(el: Element): void {
// Snapshot attribute names before mutating (linkedom attributes is array-like)
const attrs = Array.from(
el.attributes as unknown as ArrayLike<{ name: string }>,
).map((a) => a.name);
for (const attr of attrs) {
// Remove event handlers (onclick, onerror, onload, …)
if (/^on/i.test(attr)) {
el.removeAttribute(attr);
continue;
}
// Remove javascript: URLs
if (["href", "src", "action"].includes(attr.toLowerCase())) {
const val = el.getAttribute(attr) ?? "";
if (/^\s*javascript:/i.test(val)) {
el.removeAttribute(attr);
continue;
}
}
}
// Strip mso-* inline style properties (Office HTML noise)
const style = el.getAttribute("style");
if (style !== null) {
const cleaned = cleanMsoStyles(style);
if (cleaned) {
el.setAttribute("style", cleaned);
} else {
el.removeAttribute("style");
}
}
}
/**
* Processes email content for safe display in feeds and entry pages:
* - Detects plain text and wraps it in a <pre> block
* - Extracts the <body> fragment from full HTML documents
* - Removes dangerous elements: <script>, <iframe>, <object>, <embed>
* - Removes event handler attributes and javascript: URLs
* - Strips mso-* inline style properties (Office HTML)
*/
export function processEmailContent(content: string): string {
if (!content) return "";
if (isPlainText(content)) {
return `<pre style="white-space: pre-wrap; word-break: break-word;">${escapeHtml(content)}</pre>`;
}
const { document } = parseHTML(content);
document
.querySelectorAll("script, object, embed, iframe, frame, frameset")
.forEach((el: Element) => el.remove());
document.querySelectorAll("*").forEach((el: Element) => sanitizeElement(el));
const body = document.querySelector("body");
return body ? body.innerHTML : content;
}
+120
View File
@@ -170,6 +170,94 @@ describe("notifySubscribers", () => {
expect(receivedSig).toBe(""); // legacy header should NOT be sent expect(receivedSig).toBe(""); // legacy header should NOT be sent
}); });
it("POSTs Atom feed XML with correct Content-Type to Atom subscriber", async () => {
const env = mockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:metadata",
JSON.stringify({ emails: [] }),
);
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({
title: "Test Feed",
language: "en",
site_url: "https://example.com",
feed_url: "https://example.com/rss/feed1",
created_at: Date.now(),
}),
);
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://atom-reader.example/callback",
expiresAt: Date.now() + 60000,
format: "atom",
},
];
await saveSubscriptions("feed1", subs, env);
let receivedContentType = "";
let receivedLink = "";
server.use(
http.post("https://atom-reader.example/callback", async ({ request }) => {
receivedContentType = request.headers.get("Content-Type") ?? "";
receivedLink = request.headers.get("Link") ?? "";
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(receivedContentType).toContain("application/atom+xml");
expect(receivedLink).toContain(`/atom/feed1`);
expect(receivedLink).toContain(`rel="self"`);
});
it("notifies RSS and Atom subscribers independently with correct formats", async () => {
const env = mockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:metadata",
JSON.stringify({ emails: [] }),
);
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({
title: "Test Feed",
language: "en",
site_url: "https://example.com",
feed_url: "https://example.com/rss/feed1",
created_at: Date.now(),
}),
);
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://rss-reader.example/callback",
expiresAt: Date.now() + 60000,
format: "rss",
},
{
callbackUrl: "https://atom-reader.example/callback",
expiresAt: Date.now() + 60000,
format: "atom",
},
];
await saveSubscriptions("feed1", subs, env);
const received: Record<string, string> = {};
server.use(
http.post("https://rss-reader.example/callback", async ({ request }) => {
received.rss = request.headers.get("Content-Type") ?? "";
return HttpResponse.text("ok");
}),
http.post("https://atom-reader.example/callback", async ({ request }) => {
received.atom = request.headers.get("Content-Type") ?? "";
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(received.rss).toContain("application/rss+xml");
expect(received.atom).toContain("application/atom+xml");
});
it("prunes expired subscriptions and does not notify them", async () => { it("prunes expired subscriptions and does not notify them", async () => {
const env = mockEnv(); const env = mockEnv();
await env.EMAIL_STORAGE.put( await env.EMAIL_STORAGE.put(
@@ -235,6 +323,7 @@ describe("verifyAndStoreSubscription", () => {
"https://reader.example/callback", "https://reader.example/callback",
undefined, undefined,
86400, 86400,
"rss",
env, env,
); );
@@ -245,6 +334,33 @@ describe("verifyAndStoreSubscription", () => {
expect(subs[0].expiresAt).toBeGreaterThan(Date.now()); expect(subs[0].expiresAt).toBeGreaterThan(Date.now());
}); });
it("stores format=atom and sends atom topic URL in verification request", async () => {
const env = mockEnv();
let receivedTopic = "";
server.use(
http.get("https://reader.example/callback", ({ request }) => {
const url = new URL(request.url);
receivedTopic = url.searchParams.get("hub.topic") ?? "";
const challenge = url.searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge);
}),
);
const result = await verifyAndStoreSubscription(
"feed1",
"https://reader.example/callback",
undefined,
86400,
"atom",
env,
);
expect(result).toBe(true);
expect(receivedTopic).toContain("/atom/feed1");
const subs = await getSubscriptions("feed1", env);
expect(subs[0].format).toBe("atom");
});
it("returns false and does not store when callback returns wrong challenge", async () => { it("returns false and does not store when callback returns wrong challenge", async () => {
const env = mockEnv(); const env = mockEnv();
server.use( server.use(
@@ -258,6 +374,7 @@ describe("verifyAndStoreSubscription", () => {
"https://reader.example/callback", "https://reader.example/callback",
undefined, undefined,
86400, 86400,
"rss",
env, env,
); );
@@ -286,6 +403,7 @@ describe("verifyAndStoreSubscription", () => {
"https://reader.example/callback", "https://reader.example/callback",
"newsecret", "newsecret",
3600, 3600,
"rss",
env, env,
); );
@@ -306,6 +424,7 @@ describe("verifyAndStoreSubscription", () => {
"https://reader.example/callback", "https://reader.example/callback",
undefined, undefined,
86400, 86400,
"rss",
env, env,
); );
@@ -329,6 +448,7 @@ describe("verifyAndStoreSubscription", () => {
"https://reader.example/callback", "https://reader.example/callback",
undefined, undefined,
86400, 86400,
"rss",
env, env,
); );
+72 -38
View File
@@ -5,7 +5,7 @@ import {
EmailData, EmailData,
WebSubSubscription, WebSubSubscription,
} from "../types"; } from "../types";
import { generateRssFeed } from "./feed-generator"; import { generateRssFeed, generateAtomFeed } from "./feed-generator";
const KV_PREFIX = "websub:subs:"; const KV_PREFIX = "websub:subs:";
@@ -54,7 +54,11 @@ export async function buildHmacSignature(
return `sha256=${hex}`; return `sha256=${hex}`;
} }
async function buildFeedXml(feedId: string, env: Env): Promise<string | null> { async function buildFeedXml(
feedId: string,
env: Env,
format: "rss" | "atom" = "rss",
): Promise<string | null> {
const [rawMetadata, rawConfig] = await Promise.all([ const [rawMetadata, rawConfig] = await Promise.all([
env.EMAIL_STORAGE.get(`feed:${feedId}:metadata`, "json"), env.EMAIL_STORAGE.get(`feed:${feedId}:metadata`, "json"),
env.EMAIL_STORAGE.get(`feed:${feedId}:config`, "json"), env.EMAIL_STORAGE.get(`feed:${feedId}:config`, "json"),
@@ -63,11 +67,12 @@ async function buildFeedXml(feedId: string, env: Env): Promise<string | null> {
const feedMetadata = rawMetadata as FeedMetadata | null; const feedMetadata = rawMetadata as FeedMetadata | null;
if (!feedMetadata) return null; if (!feedMetadata) return null;
const baseUrl = `https://${env.DOMAIN}`;
const feedConfig = (rawConfig as FeedConfig | null) ?? { const feedConfig = (rawConfig as FeedConfig | null) ?? {
title: `Newsletter Feed ${feedId}`, title: `Newsletter Feed ${feedId}`,
description: "Converted email newsletter", description: "Converted email newsletter",
site_url: `https://${env.DOMAIN}/rss/${feedId}`, site_url: `${baseUrl}/rss/${feedId}`,
feed_url: `https://${env.DOMAIN}/rss/${feedId}`, feed_url: `${baseUrl}/rss/${feedId}`,
language: "en", language: "en",
created_at: Date.now(), created_at: Date.now(),
}; };
@@ -82,12 +87,16 @@ async function buildFeedXml(feedId: string, env: Env): Promise<string | null> {
) )
).filter((d): d is EmailData => d !== null); ).filter((d): d is EmailData => d !== null);
return generateRssFeed( if (format === "atom") {
feedConfig, return generateAtomFeed(
emailsData, feedConfig,
`https://${env.DOMAIN}`, emailsData,
feedId, baseUrl,
); feedId,
`${baseUrl}/atom/${feedId}`,
);
}
return generateRssFeed(feedConfig, emailsData, baseUrl, feedId);
} }
export async function notifySubscribers( export async function notifySubscribers(
@@ -105,36 +114,59 @@ export async function notifySubscribers(
return; return;
} }
const feedXml = await buildFeedXml(feedId, env); const rssSubs = active.filter((s) => (s.format ?? "rss") === "rss");
if (!feedXml) return; const atomSubs = active.filter((s) => s.format === "atom");
const [rssFeed, atomFeed] = await Promise.all([
rssSubs.length > 0 ? buildFeedXml(feedId, env, "rss") : null,
atomSubs.length > 0 ? buildFeedXml(feedId, env, "atom") : null,
]);
if (!rssFeed && !atomFeed) return;
const baseUrl = `https://${env.DOMAIN}`; const baseUrl = `https://${env.DOMAIN}`;
const linkHeader = `<${baseUrl}/hub>; rel="hub", <${baseUrl}/rss/${feedId}>; rel="self"`;
await Promise.allSettled( const deliver = async (
active.map(async (sub) => { sub: WebSubSubscription,
const headers: Record<string, string> = { feedXml: string,
"Content-Type": "application/rss+xml", contentType: string,
Link: linkHeader, selfPath: string,
}; ) => {
if (sub.secret) { const linkHeader = `<${baseUrl}/hub>; rel="hub", <${baseUrl}${selfPath}>; rel="self"`;
headers["X-Hub-Signature-256"] = await buildHmacSignature( const headers: Record<string, string> = {
feedXml, "Content-Type": contentType,
sub.secret, Link: linkHeader,
); };
} if (sub.secret) {
const res = await fetch(sub.callbackUrl, { headers["X-Hub-Signature-256"] = await buildHmacSignature(
method: "POST", feedXml,
headers, sub.secret,
body: feedXml, );
}); }
if (!res.ok) { const res = await fetch(sub.callbackUrl, {
console.error( method: "POST",
`WebSub: delivery failed ${sub.callbackUrl}: ${res.status}`, headers,
); body: feedXml,
} });
}), if (!res.ok) {
); console.error(
`WebSub: delivery failed ${sub.callbackUrl}: ${res.status}`,
);
}
};
await Promise.allSettled([
...(rssFeed
? rssSubs.map((sub) =>
deliver(sub, rssFeed, "application/rss+xml", `/rss/${feedId}`),
)
: []),
...(atomFeed
? atomSubs.map((sub) =>
deliver(sub, atomFeed, "application/atom+xml", `/atom/${feedId}`),
)
: []),
]);
if (active.length < subs.length) { if (active.length < subs.length) {
await saveSubscriptions(feedId, active, env); await saveSubscriptions(feedId, active, env);
@@ -146,10 +178,11 @@ export async function verifyAndStoreSubscription(
callbackUrl: string, callbackUrl: string,
secret: string | undefined, secret: string | undefined,
leaseSeconds: number, leaseSeconds: number,
format: "rss" | "atom",
env: Env, env: Env,
): Promise<boolean> { ): Promise<boolean> {
const challenge = crypto.randomUUID().replace(/-/g, ""); const challenge = crypto.randomUUID().replace(/-/g, "");
const topicUrl = `https://${env.DOMAIN}/rss/${feedId}`; const topicUrl = `https://${env.DOMAIN}/${format}/${feedId}`;
const verifyUrl = new URL(callbackUrl); const verifyUrl = new URL(callbackUrl);
verifyUrl.searchParams.set("hub.mode", "subscribe"); verifyUrl.searchParams.set("hub.mode", "subscribe");
verifyUrl.searchParams.set("hub.topic", topicUrl); verifyUrl.searchParams.set("hub.topic", topicUrl);
@@ -172,6 +205,7 @@ export async function verifyAndStoreSubscription(
const entry: WebSubSubscription = { const entry: WebSubSubscription = {
callbackUrl, callbackUrl,
expiresAt: Date.now() + leaseSeconds * 1000, expiresAt: Date.now() + leaseSeconds * 1000,
format,
...(secret ? { secret } : {}), ...(secret ? { secret } : {}),
}; };
if (idx >= 0) { if (idx >= 0) {