refactor: split src into domain / application / infrastructure layers

Replace the history-driven lib/ + utils/ split with DDD layers:
- domain/: aggregate, repositories, value objects, pure parsers/format
- application/: feed-service, email-processor, feed-fetcher, stats
- infrastructure/: logging, auth, KV/R2 adapters, HTTP, framework glue

Pure file relocation; imports updated mechanically. Behaviour unchanged.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-24 00:46:56 +02:00
parent ab1c15e69a
commit 7bf0f71f86
45 changed files with 90 additions and 68 deletions
+9
View File
@@ -0,0 +1,9 @@
import { Env } from "../types";
// Returns the attachment bucket only when the feature is enabled, so callers can
// narrow cleanly. Attachments are on whenever R2 is bound, unless explicitly
// turned off with ATTACHMENTS_ENABLED="false".
export function getAttachmentBucket(env: Env): R2Bucket | undefined {
if (env.ATTACHMENTS_ENABLED === "false") return undefined;
return env.ATTACHMENT_BUCKET;
}
+78
View File
@@ -0,0 +1,78 @@
import { Context } from "hono";
import { Env } from "../types";
/**
* Constant-time string comparison. Prefers the runtime's native
* `crypto.subtle.timingSafeEqual` (Cloudflare Workers) and falls back to a
* manual constant-time loop in environments that lack it (Node test runtime).
*/
export function timingSafeEqual(a: string, b: string): boolean {
const enc = new TextEncoder();
const aBytes = enc.encode(a);
const bBytes = enc.encode(b);
// Try native timing-safe implementation first (Cloudflare Workers runtime)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const subtle = crypto.subtle as any;
if (typeof subtle.timingSafeEqual === "function") {
if (aBytes.length !== bBytes.length) return false;
return subtle.timingSafeEqual(aBytes, bBytes);
}
// Constant-time fallback for Node (test environment): encode length
// mismatch into `diff` so the loop always runs over the full length.
const len = Math.max(aBytes.length, bBytes.length);
let diff = aBytes.length ^ bBytes.length;
for (let i = 0; i < len; i++) {
diff |= (aBytes[i] ?? 0) ^ (bBytes[i] ?? 0);
}
return diff === 0;
}
/**
* Reverse-proxy authentication: trusted only when both `PROXY_AUTH_SECRET` and
* `PROXY_TRUSTED_IPS` are configured, the request comes from a trusted IP, the
* shared secret matches, and a `Remote-User`/`X-Forwarded-User` is present.
*/
export function checkProxyAuth(c: Context, env: Env): boolean {
if (!env.PROXY_AUTH_SECRET || !env.PROXY_TRUSTED_IPS) return false;
const trustedIps = env.PROXY_TRUSTED_IPS.split(",")
.map((s: string) => s.trim())
.filter(Boolean);
const clientIp = c.req.header("CF-Connecting-IP") ?? "";
const providedSecret = c.req.header("X-Auth-Proxy-Secret") ?? "";
const remoteUser =
c.req.header("Remote-User") || c.req.header("X-Forwarded-User") || "";
return (
trustedIps.includes(clientIp) &&
timingSafeEqual(providedSecret, env.PROXY_AUTH_SECRET) &&
remoteUser.length > 0
);
}
/**
* Authentication for the machine-facing REST API (`/api/v1/*`).
* Grants access when proxy auth passes OR the request carries a valid
* `Authorization: Bearer <ADMIN_PASSWORD>`. No cookie, no CSRF — token only.
*/
export async function apiAuthMiddleware(
c: Context<{ Bindings: Env }>,
next: () => Promise<void>,
): Promise<Response | void> {
const env = c.env;
if (checkProxyAuth(c, env)) {
return next();
}
const authHeader = c.req.header("Authorization") ?? "";
const token = authHeader.startsWith("Bearer ")
? authHeader.slice("Bearer ".length)
: "";
if (token && timingSafeEqual(token, env.ADMIN_PASSWORD)) {
return next();
}
return c.json({ error: "Unauthorized" }, 401);
}
+126
View File
@@ -0,0 +1,126 @@
import { describe, it, expect, beforeEach } from "vitest";
import "../test/setup";
import { createMockEnv } from "../test/setup";
import { handleCloudflareEmail } from "./cloudflare-email";
const VALID_FEED_ID = "apple.mountain.42";
const DOMAIN = "test.getmynews.app";
const RAW_EMAIL = [
"From: Sender Name <sender@example.com>",
`To: ${VALID_FEED_ID}@${DOMAIN}`,
"Subject: Hello World",
"Date: Thu, 01 Jan 2026 12:00:00 +0000",
"MIME-Version: 1.0",
"Content-Type: text/plain; charset=utf-8",
"",
"This is the email body.",
].join("\r\n");
function makeMessage(
overrides: Partial<{ from: string; to: string; rawText: string }> = {},
): ForwardableEmailMessage {
const rawText = overrides.rawText ?? RAW_EMAIL;
const encoder = new TextEncoder();
const bytes = encoder.encode(rawText);
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(bytes);
controller.close();
},
});
return {
from: overrides.from ?? "sender@example.com",
to: overrides.to ?? `${VALID_FEED_ID}@${DOMAIN}`,
headers: new Headers(),
raw: stream,
rawSize: bytes.length,
forward: async () => {},
reply: async () => {},
setReject: () => {},
} as unknown as ForwardableEmailMessage;
}
describe("handleCloudflareEmail", () => {
let env: ReturnType<typeof createMockEnv>;
beforeEach(() => {
env = createMockEnv();
});
it("stores email in KV when feed exists", async () => {
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
await handleCloudflareEmail(
makeMessage(),
env as any,
{ waitUntil: () => {} } as any,
);
const metadata = await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
);
expect(metadata.emails).toHaveLength(1);
expect(metadata.emails[0].subject).toBe("Hello World");
});
it("does not throw when feed does not exist", async () => {
await expect(
handleCloudflareEmail(
makeMessage(),
env as any,
{ waitUntil: () => {} } as any,
),
).resolves.toBeUndefined();
});
it("does not throw when email is malformed", async () => {
const msg = makeMessage({ rawText: "not a valid email" });
await expect(
handleCloudflareEmail(msg, env as any, { waitUntil: () => {} } as any),
).resolves.toBeUndefined();
});
it("uses sender from message.from for allowlist check", async () => {
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({ allowed_senders: ["sender@example.com"] }),
);
await handleCloudflareEmail(
makeMessage(),
env as any,
{ waitUntil: () => {} } as any,
);
const metadata = await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
);
expect(metadata.emails).toHaveLength(1);
});
it("rejects email when sender is not in allowlist (stored nothing)", async () => {
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({ allowed_senders: ["other@example.com"] }),
);
await handleCloudflareEmail(
makeMessage(),
env as any,
{ waitUntil: () => {} } as any,
);
const metadata = await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
);
expect(metadata).toBeNull();
});
});
+58
View File
@@ -0,0 +1,58 @@
import PostalMime from "postal-mime";
import { Env } from "../types";
import { processEmail, RawAttachment } from "../application/email-processor";
import { normalizeCid } from "../infrastructure/html-processor";
import { logger } from "./logger";
export async function handleCloudflareEmail(
message: ForwardableEmailMessage,
env: Env,
ctx: ExecutionContext,
): Promise<void> {
try {
const email = await PostalMime.parse(message.raw);
const fromAddress = email.from?.address ?? message.from;
const from =
email.from?.name && email.from.address
? `${email.from.name} <${email.from.address}>`
: fromAddress;
const headers: Record<string, string> = {};
for (const h of email.headers) {
headers[h.key] = h.value;
}
const rawAttachments: RawAttachment[] = (email.attachments ?? [])
.filter((a) => a.content instanceof ArrayBuffer)
.map((a) => ({
filename: a.filename || "attachment",
contentType: a.mimeType || "application/octet-stream",
content: a.content as ArrayBuffer,
contentId: normalizeCid(a.contentId),
}));
const result = await processEmail(
{
toAddress: message.to,
from,
senders: [message.from],
subject: email.subject ?? "(no subject)",
content: email.html ?? email.text ?? "",
receivedAt: email.date ? new Date(email.date).getTime() : Date.now(),
headers,
attachments: rawAttachments,
},
env,
ctx,
);
if (!result.ok) {
logger.warn("Inbound email rejected", {
to: message.to,
reason: result.reason,
});
}
} catch (error) {
console.error("Error processing Cloudflare email:", error);
}
}
+149
View File
@@ -0,0 +1,149 @@
import { describe, it, expect } from "vitest";
import { http, HttpResponse } from "msw";
import { server, createMockEnv } from "../test/setup";
import {
cacheFaviconForDomain,
extractEmailDomain,
getCachedIcon,
} from "./favicon-fetcher";
import { MAX_ICON_BYTES } from "../config/constants";
const iconKey = (domain: string) => `icon:${domain}`;
import type { Env } from "../types";
const PNG = new Uint8Array([0x89, 0x50, 0x4e, 0x47, 1, 2, 3, 4]);
function imageResponse(bytes: Uint8Array, contentType = "image/png") {
return new HttpResponse(bytes, { headers: { "Content-Type": contentType } });
}
describe("extractEmailDomain", () => {
it("parses a bare address", () => {
expect(extractEmailDomain("news@github.com")).toBe("github.com");
});
it("parses a display-form address", () => {
expect(extractEmailDomain("GitHub <news@GitHub.com>")).toBe("github.com");
});
it("strips a trailing dot and lowercases", () => {
expect(extractEmailDomain("a@Example.COM.")).toBe("example.com");
});
it("returns null when there is no address", () => {
expect(extractEmailDomain("not an email")).toBeNull();
});
});
describe("cacheFaviconForDomain", () => {
it("caches the direct /favicon.ico when available", async () => {
const env = createMockEnv() as unknown as Env;
server.use(
http.get("https://github.com/favicon.ico", () => imageResponse(PNG)),
);
await cacheFaviconForDomain("github.com", env);
const record = await env.EMAIL_STORAGE.get(iconKey("github.com"), "json");
expect(record).toMatchObject({ contentType: "image/png" });
expect((record as { data: string }).data).toBeTruthy();
expect(record).not.toHaveProperty("fetchedAt");
const icon = await getCachedIcon("github.com", env);
expect(icon?.contentType).toBe("image/png");
expect(new Uint8Array(icon!.bytes)).toEqual(PNG);
});
it("falls back to DuckDuckGo when the direct icon 404s", async () => {
const env = createMockEnv() as unknown as Env;
server.use(
http.get("https://acme.test/favicon.ico", () =>
HttpResponse.text("nope", { status: 404 }),
),
http.get("https://icons.duckduckgo.com/ip3/acme.test.ico", () =>
imageResponse(PNG, "image/x-icon"),
),
);
await cacheFaviconForDomain("acme.test", env);
const icon = await getCachedIcon("acme.test", env);
expect(icon?.contentType).toBe("image/x-icon");
});
it("writes a negative entry when no icon is found", async () => {
const env = createMockEnv() as unknown as Env;
server.use(
http.get("https://nope.test/favicon.ico", () =>
HttpResponse.text("", { status: 404 }),
),
http.get("https://icons.duckduckgo.com/ip3/nope.test.ico", () =>
HttpResponse.text("", { status: 404 }),
),
);
await cacheFaviconForDomain("nope.test", env);
const record = await env.EMAIL_STORAGE.get(iconKey("nope.test"), "json");
expect(record).toEqual({ data: null, contentType: "" });
expect(await getCachedIcon("nope.test", env)).toBeNull();
});
it("rejects oversized responses as negative", async () => {
const env = createMockEnv() as unknown as Env;
const big = new Uint8Array(MAX_ICON_BYTES + 1);
server.use(
http.get("https://big.test/favicon.ico", () => imageResponse(big)),
http.get("https://icons.duckduckgo.com/ip3/big.test.ico", () =>
HttpResponse.text("", { status: 404 }),
),
);
await cacheFaviconForDomain("big.test", env);
expect(await getCachedIcon("big.test", env)).toBeNull();
});
it("rejects non-image content types as negative", async () => {
const env = createMockEnv() as unknown as Env;
server.use(
http.get("https://html.test/favicon.ico", () =>
HttpResponse.text("<html>", {
headers: { "Content-Type": "text/html" },
}),
),
http.get("https://icons.duckduckgo.com/ip3/html.test.ico", () =>
HttpResponse.text("", { status: 404 }),
),
);
await cacheFaviconForDomain("html.test", env);
expect(await getCachedIcon("html.test", env)).toBeNull();
});
it("short-circuits when an entry already exists (no outbound fetch)", async () => {
const env = createMockEnv() as unknown as Env;
// Pre-seed a record; with MSW onUnhandledRequest:"error", any fetch fails.
await env.EMAIL_STORAGE.put(
iconKey("cached.test"),
JSON.stringify({ data: null, contentType: "" }),
);
await expect(
cacheFaviconForDomain("cached.test", env),
).resolves.toBeUndefined();
});
it("never throws on network errors", async () => {
const env = createMockEnv() as unknown as Env;
server.use(
http.get("https://err.test/favicon.ico", () => HttpResponse.error()),
http.get("https://icons.duckduckgo.com/ip3/err.test.ico", () =>
HttpResponse.error(),
),
);
await expect(
cacheFaviconForDomain("err.test", env),
).resolves.toBeUndefined();
});
});
+124
View File
@@ -0,0 +1,124 @@
import { Env } from "../types";
import {
ICON_FETCH_TIMEOUT_MS,
ICON_TTL_SECONDS,
MAX_ICON_BYTES,
} from "../config/constants";
import { IconRepository } from "../domain/icon-repository";
import { EmailAddress } from "../domain/value-objects/email-address";
import { logger } from "../infrastructure/logger";
interface IconRecord {
data: string | null; // base64 icon bytes, or null for a negative cache entry
contentType: string;
}
/**
* Extract the lowercased domain from a `from` value, accepting either a bare
* address (`a@b.com`) or a display form (`Name <a@b.com>`). Returns null when
* no plausible address can be parsed.
*/
export function extractEmailDomain(from: string): string | null {
return EmailAddress.parse(from)?.domain.value ?? null;
}
function arrayBufferToBase64(buffer: ArrayBuffer): string {
const bytes = new Uint8Array(buffer);
let binary = "";
const chunkSize = 0x8000;
for (let i = 0; i < bytes.length; i += chunkSize) {
binary += String.fromCharCode(...bytes.subarray(i, i + chunkSize));
}
return btoa(binary);
}
function base64ToArrayBuffer(base64: string): ArrayBuffer {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return bytes.buffer;
}
async function fetchIconFrom(
url: string,
): Promise<{ buffer: ArrayBuffer; contentType: string } | null> {
const res = await fetch(url, {
redirect: "follow",
signal: AbortSignal.timeout(ICON_FETCH_TIMEOUT_MS),
headers: { "User-Agent": "kill-the-news/1.0" },
});
if (!res.ok) return null;
const contentType = res.headers.get("content-type") ?? "";
if (!contentType.startsWith("image/")) return null;
const buffer = await res.arrayBuffer();
if (buffer.byteLength === 0 || buffer.byteLength > MAX_ICON_BYTES)
return null;
return { buffer, contentType: contentType.split(";")[0].trim() };
}
async function resolveIcon(
domain: string,
): Promise<{ buffer: ArrayBuffer; contentType: string } | null> {
const candidates = [
`https://${domain}/favicon.ico`,
`https://icons.duckduckgo.com/ip3/${domain}.ico`,
];
for (const url of candidates) {
try {
const icon = await fetchIconFrom(url);
if (icon) return icon;
} catch {
// Try the next candidate; network/timeout errors must never propagate.
}
}
return null;
}
/**
* Resolve and cache the favicon for a sender domain. Idempotent and never
* throws: if a (success or negative) cache entry already exists it returns
* immediately, so callers can fire this on every email without refetching.
* The KV TTL is the sole expiry mechanism.
*/
export async function cacheFaviconForDomain(
domain: string,
env: Env,
): Promise<void> {
try {
const repo = IconRepository.from(env);
const existing = await repo.getText(domain);
if (existing !== null) return; // present (incl. negative) → nothing to do
const icon = await resolveIcon(domain);
const record: IconRecord = icon
? {
data: arrayBufferToBase64(icon.buffer),
contentType: icon.contentType,
}
: { data: null, contentType: "" };
await repo.put(domain, JSON.stringify(record), ICON_TTL_SECONDS);
} catch (error) {
logger.warn("Favicon cache failed", { domain, error: String(error) });
}
}
/**
* Read a cached icon for a domain. Returns null on a miss or a negative entry.
*/
export async function getCachedIcon(
domain: string,
env: Env,
): Promise<{ bytes: ArrayBuffer; contentType: string } | null> {
const record = await IconRepository.from(env).getJson<IconRecord>(domain);
if (!record || record.data === null) return null;
return {
bytes: base64ToArrayBuffer(record.data),
contentType: record.contentType,
};
}
+337
View File
@@ -0,0 +1,337 @@
import { describe, it, expect } from "vitest";
import {
generateRssFeed,
generateAtomFeed,
extractBodyContent,
} from "./feed-generator";
import { FeedConfig, EmailData } from "../types";
const mockFeedConfig: FeedConfig = {
title: "Test Newsletter",
description: "A test feed",
language: "en",
created_at: 1700000000000,
};
const mockEmails: EmailData[] = [
{
subject: "Hello World",
from: "Alice <alice@example.com>",
content: "<p>Hello from Alice</p>",
receivedAt: 1700000001000,
headers: {},
},
];
const mockEmailWithAttachment: EmailData = {
...mockEmails[0],
attachments: [
{
id: "550e8400-e29b-41d4-a716-446655440000",
filename: "report.pdf",
contentType: "application/pdf",
size: 12345,
},
],
};
const BASE_URL = "https://test.getmynews.app";
const FEED_ID = "abc123";
describe("extractBodyContent", () => {
it("extracts content inside <body> tags", () => {
const html = "<html><head></head><body><p>Hello</p></body></html>";
expect(extractBodyContent(html)).toBe("<p>Hello</p>");
});
it("handles body tag with attributes", () => {
const html = '<html><body style="margin:0"><p>Hi</p></body></html>';
expect(extractBodyContent(html)).toBe("<p>Hi</p>");
});
it("returns html unchanged when no body tags present", () => {
const fragment = "<p>Already a fragment</p>";
expect(extractBodyContent(fragment)).toBe(fragment);
});
it("is case-insensitive for body tag matching", () => {
const html = "<HTML><BODY><p>content</p></BODY></HTML>";
expect(extractBodyContent(html)).toBe("<p>content</p>");
});
});
describe("generateRssFeed", () => {
it("returns RSS 2.0 with channel element", () => {
const result = generateRssFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("<channel>");
expect(result).toContain("<title>Test Newsletter</title>");
});
it("includes the per-feed icon as the channel <image>", () => {
const result = generateRssFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("<image>");
expect(result).toContain(`${BASE_URL}/favicon/${FEED_ID}`);
});
it("includes <enclosure> element for email with attachment", () => {
const result = generateRssFeed(
mockFeedConfig,
[mockEmailWithAttachment],
BASE_URL,
FEED_ID,
);
expect(result).toContain("<enclosure");
expect(result).toContain("550e8400-e29b-41d4-a716-446655440000");
expect(result).toContain("report.pdf");
expect(result).toContain("application/pdf");
expect(result).toContain("12345");
});
it("does not include <enclosure> for email without attachments", () => {
const result = generateRssFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).not.toContain("<enclosure");
});
it("enclosure URL uses /files/{id}/{filename} scheme", () => {
const result = generateRssFeed(
mockFeedConfig,
[mockEmailWithAttachment],
BASE_URL,
FEED_ID,
);
expect(result).toContain(
`${BASE_URL}/files/550e8400-e29b-41d4-a716-446655440000/report.pdf`,
);
});
it("includes rss self-link in RSS output", () => {
const result = generateRssFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain(`${BASE_URL}/rss/${FEED_ID}`);
});
it("includes email entries as <item> elements", () => {
const result = generateRssFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("<item>");
expect(result).toContain("Hello World");
});
it("works with empty emails array", () => {
const result = generateRssFeed(mockFeedConfig, [], BASE_URL, FEED_ID);
expect(result).toContain("<channel>");
expect(result).not.toContain("<item>");
});
it("feed link points to admin emails page", () => {
const result = generateRssFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`);
});
it("strips html/head/body wrapper from item description", () => {
const emailWithFullHtml: EmailData = {
...mockEmails[0],
content: "<html><head></head><body><p>Body only</p></body></html>",
};
const result = generateRssFeed(
mockFeedConfig,
[emailWithFullHtml],
BASE_URL,
FEED_ID,
);
expect(result).toContain("<p>Body only</p>");
expect(result).not.toContain("<html>");
});
});
describe("generateAtomFeed", () => {
it("returns Atom 1.0 namespace", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain('xmlns="http://www.w3.org/2005/Atom"');
});
it("includes the per-feed icon as <icon> and <logo>", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
const iconUrl = `${BASE_URL}/favicon/${FEED_ID}`;
expect(result).toContain(`<icon>${iconUrl}</icon>`);
expect(result).toContain(`<logo>${iconUrl}</logo>`);
});
it("contains <feed> root element", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("<feed");
expect(result).toContain("</feed>");
});
it("includes feed title", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("Test Newsletter");
});
it("includes <entry> elements for each email", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("<entry>");
expect(result).toContain("Hello World");
});
it("includes author information", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("Alice");
});
it("self-link points to atom URL", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain(`${BASE_URL}/atom/${FEED_ID}`);
});
it("includes rss alternate link", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain(`${BASE_URL}/rss/${FEED_ID}`);
});
it("works with empty emails array", () => {
const result = generateAtomFeed(mockFeedConfig, [], BASE_URL, FEED_ID);
expect(result).toContain("<feed");
expect(result).not.toContain("<entry>");
});
it("feed link points to admin emails page", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain(`${BASE_URL}/admin/feeds/${FEED_ID}/emails`);
});
it("strips html/head/body wrapper from entry content", () => {
const emailWithFullHtml: EmailData = {
...mockEmails[0],
content: "<html><head></head><body><p>Body only</p></body></html>",
};
const result = generateAtomFeed(
mockFeedConfig,
[emailWithFullHtml],
BASE_URL,
FEED_ID,
);
expect(result).toContain("<p>Body only</p>");
expect(result).not.toContain("<html>");
});
it("handles config without description", () => {
const configNoDesc: FeedConfig = {
...mockFeedConfig,
description: undefined,
};
const result = generateAtomFeed(
configNoDesc,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain('xmlns="http://www.w3.org/2005/Atom"');
});
it("handles config with author field", () => {
const configWithAuthor: FeedConfig = { ...mockFeedConfig, author: "Bob" };
const result = generateAtomFeed(
configWithAuthor,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).toContain("Bob");
});
it("includes enclosure link for email with attachment in Atom feed", () => {
const result = generateAtomFeed(
mockFeedConfig,
[mockEmailWithAttachment],
BASE_URL,
FEED_ID,
);
expect(result).toContain('rel="enclosure"');
expect(result).toContain("550e8400-e29b-41d4-a716-446655440000");
expect(result).toContain("report.pdf");
});
it("does not include enclosure link for email without attachments in Atom feed", () => {
const result = generateAtomFeed(
mockFeedConfig,
mockEmails,
BASE_URL,
FEED_ID,
);
expect(result).not.toContain('rel="enclosure"');
});
});
+114
View File
@@ -0,0 +1,114 @@
import { Feed } from "feed";
import { FeedConfig, EmailData } from "../types";
import { processEmailContent } from "./html-processor";
export { processEmailContent as extractBodyContent };
function parseFromAddress(from: string): { name: string; email?: string } {
const match = from.match(/^(.*?)\s*<([^>]+)>\s*$/);
if (match) {
return { name: match[1].trim() || match[2], email: match[2].trim() };
}
const emailOnly = from.match(/^[^\s@]+@[^\s@]+\.[^\s@]+$/);
if (emailOnly) {
return { email: from.trim(), name: from.trim() };
}
return { name: from.trim() };
}
function buildFeed(
feedConfig: FeedConfig,
emails: EmailData[],
baseUrl: string,
feedId: string,
selfUrl?: { rss?: string; atom?: string },
): Feed {
const iconUrl = `${baseUrl}/favicon/${feedId}`;
const feed = new Feed({
title: feedConfig.title,
description: feedConfig.description || "",
// Per-feed icon derived from the last sender's domain (self-falls-back to
// the project icon). image → RSS <image>/Atom <logo>; favicon → Atom <icon>.
image: iconUrl,
favicon: iconUrl,
// Computed dynamically so the id is always canonical regardless of what
// was stored in KV at feed-creation time (which may have used a stale domain).
id: `${baseUrl}/rss/${feedId}`,
// Link points to the admin emails page — the "website" this feed represents.
link: `${baseUrl}/admin/feeds/${feedId}/emails`,
language: feedConfig.language,
updated: new Date(),
generator: "kill-the-news",
copyright: `Copyright © ${new Date().getFullYear()} ${feedConfig.title}`,
feedLinks: {
rss: selfUrl?.rss ?? `${baseUrl}/rss/${feedId}`,
atom: selfUrl?.atom ?? `${baseUrl}/atom/${feedId}`,
},
author: feedConfig.author
? {
name: feedConfig.author,
email: `noreply@${new URL(baseUrl).hostname}`,
}
: undefined,
});
for (const email of emails) {
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
const firstAttachment = email.attachments?.[0];
const bodyContent = processEmailContent(
email.content,
email.attachments,
baseUrl,
);
feed.addItem({
title: email.subject,
id: entryUrl,
link: entryUrl,
description: bodyContent,
content: bodyContent,
author: [parseFromAddress(email.from)],
date: new Date(email.receivedAt),
enclosure: firstAttachment
? {
url: `${baseUrl}/files/${firstAttachment.id}/${encodeURIComponent(firstAttachment.filename)}`,
type: firstAttachment.contentType,
length: firstAttachment.size,
}
: undefined,
});
}
return feed;
}
export function generateRssFeed(
feedConfig: FeedConfig,
emails: EmailData[],
baseUrl: string,
feedId: string,
selfUrl?: string,
): string {
return buildFeed(
feedConfig,
emails,
baseUrl,
feedId,
selfUrl ? { rss: selfUrl } : undefined,
).rss2();
}
export function generateAtomFeed(
feedConfig: FeedConfig,
emails: EmailData[],
baseUrl: string,
feedId: string,
selfUrl?: string,
): string {
return buildFeed(
feedConfig,
emails,
baseUrl,
feedId,
selfUrl ? { atom: selfUrl } : undefined,
).atom1();
}
+127
View File
@@ -0,0 +1,127 @@
import { EmailParser } from "../domain/email-parser";
import { Env } from "../types";
import {
processEmail,
IngestResult,
RawAttachment,
} from "../application/email-processor";
import { normalizeCid } from "../infrastructure/html-processor";
/** Map an ingestion result to the HTTP response ForwardEmail expects. */
export function ingestResultToResponse(result: IngestResult): Response {
if (result.ok) {
return new Response("Email processed successfully", { status: 200 });
}
switch (result.reason) {
case "invalid_address":
return new Response("Invalid email address format", { status: 400 });
case "feed_not_found":
return new Response("Feed does not exist", { status: 404 });
case "feed_expired":
return new Response("Feed has expired", { status: 410 });
case "sender_blocked":
return new Response("Sender not allowed for this feed", { status: 403 });
}
}
export interface ForwardEmailAttachment {
filename?: string;
contentType?: string;
size?: number;
cid?: string;
contentId?: string;
content?: { type: "Buffer"; data: number[] } | ArrayBuffer | ArrayBufferView;
}
export interface ForwardEmailPayload {
recipients?: string[];
from?: {
value?: Array<{ address?: string; name?: string }>;
text?: string;
html?: string;
};
subject?: string;
text?: string;
html?: string;
date?: string;
messageId?: string;
headerLines?: Array<{ key: string; line: string }>;
headers?: string;
raw?: string;
attachments?: ForwardEmailAttachment[];
}
function normalizeEmail(value: string): string {
return value.trim().toLowerCase();
}
function extractSenderAddresses(payload: ForwardEmailPayload): string[] {
const valueEntries = payload.from?.value || [];
const structuredAddresses = valueEntries
.map((entry) => entry.address || "")
.map(normalizeEmail)
.filter(Boolean);
if (structuredAddresses.length > 0) {
return Array.from(new Set(structuredAddresses));
}
const fromText = payload.from?.text || "";
const matches =
fromText.match(/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi) || [];
return Array.from(new Set(matches.map(normalizeEmail)));
}
function toArrayBuffer(
content: ForwardEmailAttachment["content"],
): ArrayBuffer | null {
if (!content) return null;
if (content instanceof ArrayBuffer) return content;
if (ArrayBuffer.isView(content))
return (content as ArrayBufferView).buffer as ArrayBuffer;
if (
typeof content === "object" &&
content.type === "Buffer" &&
Array.isArray(content.data)
) {
return Uint8Array.from(content.data).buffer as ArrayBuffer;
}
return null;
}
export async function handleForwardEmail(
payload: ForwardEmailPayload,
env: Env,
ctx?: ExecutionContext,
): Promise<Response> {
const emailData = EmailParser.parseForwardEmailPayload(payload);
const rawAttachments: RawAttachment[] = (payload.attachments ?? [])
.map((a): RawAttachment | null => {
const buffer = toArrayBuffer(a.content);
if (!buffer) return null;
return {
filename: a.filename || "attachment",
contentType: a.contentType || "application/octet-stream",
content: buffer,
contentId: normalizeCid(a.cid ?? a.contentId),
};
})
.filter((a): a is RawAttachment => a !== null);
const result = await processEmail(
{
toAddress: payload.recipients?.[0] || "",
from: emailData.from,
senders: extractSenderAddresses(payload),
subject: emailData.subject,
content: emailData.content,
receivedAt: emailData.receivedAt,
headers: emailData.headers,
attachments: rawAttachments,
},
env,
ctx,
);
return ingestResultToResponse(result);
}
+198
View File
@@ -0,0 +1,198 @@
import { describe, it, expect } from "vitest";
import { processEmailContent } from "./html-processor";
import type { AttachmentData } from "../types";
describe("processEmailContent — body extraction", () => {
it("extracts content inside <body> tags", () => {
const html = "<html><head></head><body><p>Hello</p></body></html>";
expect(processEmailContent(html)).toBe("<p>Hello</p>");
});
it("handles body tag with attributes", () => {
const html = '<html><body style="margin:0"><p>Hi</p></body></html>';
expect(processEmailContent(html)).toBe("<p>Hi</p>");
});
it("returns fragment unchanged when no body tags present", () => {
const fragment = "<p>Already a fragment</p>";
expect(processEmailContent(fragment)).toBe("<p>Already a fragment</p>");
});
it("is case-insensitive for body tag matching", () => {
const html = "<HTML><BODY><p>content</p></BODY></HTML>";
expect(processEmailContent(html)).toBe("<p>content</p>");
});
});
describe("processEmailContent — plain text", () => {
it("wraps plain text in <pre>", () => {
const text = "Hello world\nSecond line";
const result = processEmailContent(text);
expect(result).toMatch(/^<pre /);
expect(result).toContain("Hello world\nSecond line");
});
it("escapes < and > in plain text", () => {
const text = "Price < 10 & size > 5";
const result = processEmailContent(text);
expect(result).toContain("&lt;");
expect(result).toContain("&gt;");
expect(result).toContain("&amp;");
expect(result).not.toContain("<10");
});
it("returns empty string for empty input", () => {
expect(processEmailContent("")).toBe("");
});
});
describe("processEmailContent — dangerous element removal", () => {
it("removes <script> tags", () => {
const html = "<body><p>Hello</p><script>alert('xss')</script></body>";
const result = processEmailContent(html);
expect(result).not.toContain("<script");
expect(result).not.toContain("alert");
expect(result).toContain("<p>Hello</p>");
});
it("removes <iframe> tags", () => {
const html =
"<body><iframe src='https://evil.com'></iframe><p>ok</p></body>";
const result = processEmailContent(html);
expect(result).not.toContain("<iframe");
expect(result).toContain("<p>ok</p>");
});
it("removes <object> and <embed> tags", () => {
const html = "<body><object></object><embed src='x'/><p>ok</p></body>";
const result = processEmailContent(html);
expect(result).not.toContain("<object");
expect(result).not.toContain("<embed");
});
});
describe("processEmailContent — attribute sanitization", () => {
it("removes event handler attributes", () => {
const html =
"<body><a href='https://x.com' onclick='evil()'>link</a></body>";
const result = processEmailContent(html);
expect(result).not.toContain("onclick");
expect(result).toContain('href="https://x.com"');
});
it("removes onerror on images", () => {
const html = "<body><img src='x' onerror='evil()' /></body>";
const result = processEmailContent(html);
expect(result).not.toContain("onerror");
});
it("removes javascript: hrefs", () => {
const html = "<body><a href='javascript:evil()'>click</a></body>";
const result = processEmailContent(html);
expect(result).not.toContain("javascript:");
});
it("preserves legitimate href and src attributes", () => {
const html =
"<body><a href='https://example.com'>link</a><img src='https://example.com/img.png'/></body>";
const result = processEmailContent(html);
expect(result).toContain("https://example.com");
});
});
describe("processEmailContent — mso style cleanup", () => {
it("strips mso-* properties from inline styles", () => {
const html =
'<body><p style="mso-margin-top: 0; color: red;">text</p></body>';
const result = processEmailContent(html);
expect(result).not.toContain("mso-margin-top");
expect(result).toContain("color: red");
});
it("removes style attribute entirely when only mso properties remain", () => {
const html =
'<body><p style="mso-line-height-rule: exactly;">text</p></body>';
const result = processEmailContent(html);
expect(result).not.toContain("style=");
});
it("preserves style attribute when non-mso properties remain", () => {
const html =
'<body><p style="mso-font-size: 12pt; font-weight: bold;">text</p></body>';
const result = processEmailContent(html);
expect(result).toContain("font-weight");
expect(result).not.toContain("mso-font-size");
});
});
describe("processEmailContent — inline cid: rewriting", () => {
const attachment = (
overrides: Partial<AttachmentData> = {},
): AttachmentData => ({
id: "att-123",
filename: "chicken big.png",
contentType: "image/png",
size: 100,
contentId: "ii_mpi85rqy0",
...overrides,
});
it("rewrites cid: src to a relative /files URL when no baseUrl", () => {
const html = '<body><img src="cid:ii_mpi85rqy0" alt="x"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
expect(result).not.toContain("cid:");
});
it("rewrites cid: src to an absolute URL when baseUrl is given", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(
html,
[attachment()],
"https://feed.example",
);
expect(result).toContain(
'src="https://feed.example/files/att-123/chicken%20big.png"',
);
});
it("matches a stored Content-ID that has angle brackets", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html, [
attachment({ contentId: "<ii_mpi85rqy0>" }),
]);
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
});
it("is case-insensitive on the cid: scheme", () => {
const html = '<body><img src="CID:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
});
it("leaves unknown cid references unchanged", () => {
const html = '<body><img src="cid:unknown"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="cid:unknown"');
});
it("leaves cid references unchanged when no attachments are provided", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html);
expect(result).toContain('src="cid:ii_mpi85rqy0"');
});
it("ignores attachments without a contentId", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html, [
attachment({ contentId: undefined }),
]);
expect(result).toContain('src="cid:ii_mpi85rqy0"');
});
it("does not touch normal http image sources", () => {
const html = '<body><img src="https://example.com/a.png"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="https://example.com/a.png"');
});
});
+121
View File
@@ -0,0 +1,121 @@
import { parseHTML } from "linkedom";
import escapeHtml from "escape-html";
import type { AttachmentData } from "../types";
// Strip surrounding angle brackets and whitespace from a Content-ID so that a
// stored value like "<ii_mpi85rqy0>" matches an HTML reference "cid:ii_mpi85rqy0".
export function normalizeCid(
cid: string | null | undefined,
): string | undefined {
if (!cid) return undefined;
const trimmed = cid.trim().replace(/^<|>$/g, "").trim();
return trimmed || undefined;
}
function cleanMsoStyles(style: string): string {
return style
.split(";")
.map((p) => p.trim())
.filter((p) => p && !/^mso-/i.test(p))
.join("; ");
}
function isPlainText(content: string): boolean {
return !/<[a-z][\s\S]*>/i.test(content);
}
function rewriteCidSrc(
el: Element,
cidMap: Map<string, AttachmentData>,
baseUrl: string,
): void {
const src = el.getAttribute("src") ?? "";
const match = src.match(/^\s*cid:(.+)$/i);
if (!match) return;
const attachment = cidMap.get(normalizeCid(match[1]) ?? "");
if (!attachment) return;
el.setAttribute(
"src",
`${baseUrl}/files/${attachment.id}/${encodeURIComponent(attachment.filename)}`,
);
}
function sanitizeElement(el: Element): void {
// Snapshot attribute names before mutating (linkedom attributes is array-like)
const attrs = Array.from(
el.attributes as unknown as ArrayLike<{ name: string }>,
).map((a) => a.name);
for (const attr of attrs) {
// Remove event handlers (onclick, onerror, onload, …)
if (/^on/i.test(attr)) {
el.removeAttribute(attr);
continue;
}
// Remove javascript: URLs
if (["href", "src", "action"].includes(attr.toLowerCase())) {
const val = el.getAttribute(attr) ?? "";
if (/^\s*javascript:/i.test(val)) {
el.removeAttribute(attr);
continue;
}
}
}
// Strip mso-* inline style properties (Office HTML noise)
const style = el.getAttribute("style");
if (style !== null) {
const cleaned = cleanMsoStyles(style);
if (cleaned) {
el.setAttribute("style", cleaned);
} else {
el.removeAttribute("style");
}
}
}
/**
* Processes email content for safe display in feeds and entry pages:
* - Detects plain text and wraps it in a <pre> block
* - Extracts the <body> fragment from full HTML documents
* - Removes dangerous elements: <script>, <iframe>, <object>, <embed>
* - Removes event handler attributes and javascript: URLs
* - Strips mso-* inline style properties (Office HTML)
* - Rewrites inline cid: image refs to the stored attachment URL. baseUrl=""
* yields relative URLs (entry page, same origin); a baseUrl yields absolute
* URLs (feeds, for external RSS readers).
*/
export function processEmailContent(
content: string,
attachments?: AttachmentData[],
baseUrl = "",
): string {
if (!content) return "";
if (isPlainText(content)) {
return `<pre style="white-space: pre-wrap; word-break: break-word;">${escapeHtml(content)}</pre>`;
}
const cidMap = new Map<string, AttachmentData>();
for (const att of attachments ?? []) {
const cid = normalizeCid(att.contentId);
if (cid) cidMap.set(cid, att);
}
const { document } = parseHTML(content);
document
.querySelectorAll("script, object, embed, iframe, frame, frameset")
.forEach((el: Element) => el.remove());
document.querySelectorAll("*").forEach((el: Element) => sanitizeElement(el));
if (cidMap.size > 0) {
document
.querySelectorAll("[src]")
.forEach((el: Element) => rewriteCidSrc(el, cidMap, baseUrl));
}
// Full documents expose a <body>; bodyless fragments are serialized directly
// so that sanitization and cid rewriting still apply to their nodes.
const body = document.querySelector("body");
return body ? body.innerHTML : document.toString();
}
+26
View File
@@ -0,0 +1,26 @@
type LogLevel = "info" | "warn" | "error" | "debug";
function log(
level: LogLevel,
message: string,
data?: Record<string, unknown>,
): void {
const entry = data ? { level, message, ...data } : { level, message };
const line = JSON.stringify(entry);
if (level === "error" || level === "warn") {
console.error(line);
} else {
console.log(line);
}
}
export const logger = {
info: (message: string, data?: Record<string, unknown>) =>
log("info", message, data),
warn: (message: string, data?: Record<string, unknown>) =>
log("warn", message, data),
error: (message: string, data?: Record<string, unknown>) =>
log("error", message, data),
debug: (message: string, data?: Record<string, unknown>) =>
log("debug", message, data),
};
+184
View File
@@ -0,0 +1,184 @@
import { describe, it, expect } from "vitest";
import { http, HttpResponse } from "msw";
import { server, createMockEnv } from "../test/setup";
import {
parseOneClickUnsubscribe,
sendOneClickUnsubscribe,
sendUnsubscribes,
} from "./unsubscribe";
import { getCounters } from "../application/stats";
import type { Env } from "../types";
const POST_HEADER = "List-Unsubscribe=One-Click";
describe("parseOneClickUnsubscribe", () => {
it("returns the https URL when the one-click Post header is present", () => {
expect(
parseOneClickUnsubscribe({
"list-unsubscribe": "<https://news.example.com/u?t=abc>",
"list-unsubscribe-post": POST_HEADER,
}),
).toBe("https://news.example.com/u?t=abc");
});
it("prefers the https URL when both https and mailto are present", () => {
expect(
parseOneClickUnsubscribe({
"list-unsubscribe":
"<mailto:unsub@example.com>, <https://example.com/u/1>",
"list-unsubscribe-post": POST_HEADER,
}),
).toBe("https://example.com/u/1");
});
it("returns null for a mailto-only header", () => {
expect(
parseOneClickUnsubscribe({
"list-unsubscribe": "<mailto:unsub@example.com>",
"list-unsubscribe-post": POST_HEADER,
}),
).toBeNull();
});
it("returns null when the Post header is missing", () => {
expect(
parseOneClickUnsubscribe({
"list-unsubscribe": "<https://example.com/u/1>",
}),
).toBeNull();
});
it("returns null when the Post header has the wrong value", () => {
expect(
parseOneClickUnsubscribe({
"list-unsubscribe": "<https://example.com/u/1>",
"list-unsubscribe-post": "List-Unsubscribe=Something",
}),
).toBeNull();
});
it("matches headers and Post value case-insensitively", () => {
expect(
parseOneClickUnsubscribe({
"List-Unsubscribe": "<https://example.com/u/1>",
"List-Unsubscribe-Post": "list-unsubscribe=ONE-CLICK",
}),
).toBe("https://example.com/u/1");
});
it("ignores plaintext http URLs", () => {
expect(
parseOneClickUnsubscribe({
"list-unsubscribe": "<http://example.com/u/1>",
"list-unsubscribe-post": POST_HEADER,
}),
).toBeNull();
});
it("returns null when there are no headers", () => {
expect(parseOneClickUnsubscribe({})).toBeNull();
});
});
describe("sendOneClickUnsubscribe", () => {
it("POSTs the one-click body and returns true on success", async () => {
let captured: { method: string; contentType: string; body: string } | null =
null;
server.use(
http.post("https://example.com/u/1", async ({ request }) => {
captured = {
method: request.method,
contentType: request.headers.get("content-type") ?? "",
body: await request.text(),
};
return HttpResponse.text("ok");
}),
);
const ok = await sendOneClickUnsubscribe("https://example.com/u/1");
expect(ok).toBe(true);
expect(captured).toEqual({
method: "POST",
contentType: "application/x-www-form-urlencoded",
body: POST_HEADER,
});
});
it("returns false on a non-ok response", async () => {
server.use(
http.post("https://example.com/u/1", () =>
HttpResponse.text("nope", { status: 404 }),
),
);
expect(await sendOneClickUnsubscribe("https://example.com/u/1")).toBe(
false,
);
});
it("returns false (no throw) on a network error", async () => {
server.use(
http.post("https://example.com/u/1", () => HttpResponse.error()),
);
expect(await sendOneClickUnsubscribe("https://example.com/u/1")).toBe(
false,
);
});
});
describe("sendUnsubscribes", () => {
it("de-dupes URLs and bumps unsubscribes_sent by the success count", async () => {
const env = createMockEnv() as unknown as Env;
let hitsOne = 0;
let hitsTwo = 0;
server.use(
http.post("https://example.com/a", () => {
hitsOne += 1;
return HttpResponse.text("ok");
}),
http.post("https://example.com/b", () => {
hitsTwo += 1;
return HttpResponse.text("ok");
}),
);
await sendUnsubscribes(
[
"https://example.com/a",
"https://example.com/a",
"https://example.com/b",
],
env,
);
expect(hitsOne).toBe(1);
expect(hitsTwo).toBe(1);
const counters = await getCounters(env.EMAIL_STORAGE);
expect(counters.unsubscribes_sent).toBe(2);
});
it("only counts successful requests", async () => {
const env = createMockEnv() as unknown as Env;
server.use(
http.post("https://example.com/ok", () => HttpResponse.text("ok")),
http.post("https://example.com/bad", () =>
HttpResponse.text("no", { status: 500 }),
),
);
await sendUnsubscribes(
["https://example.com/ok", "https://example.com/bad"],
env,
);
const counters = await getCounters(env.EMAIL_STORAGE);
expect(counters.unsubscribes_sent).toBe(1);
});
it("does nothing for an empty list", async () => {
const env = createMockEnv() as unknown as Env;
await sendUnsubscribes([], env);
const counters = await getCounters(env.EMAIL_STORAGE);
expect(counters.unsubscribes_sent).toBe(0);
});
});
+83
View File
@@ -0,0 +1,83 @@
import { Env } from "../types";
import { UNSUBSCRIBE_TIMEOUT_MS } from "../config/constants";
import { bumpCounters } from "../application/stats";
import { logger } from "../infrastructure/logger";
/**
* Extract a one-click unsubscribe URL from a stored email's headers per
* RFC 8058. Returns the first `https:` URL in `List-Unsubscribe` only when
* `List-Unsubscribe-Post: List-Unsubscribe=One-Click` is also present — that
* Post header is what authorises an unattended one-click POST. `mailto:` and
* plaintext `http:` links are ignored (Workers cannot send SMTP and we never
* unsubscribe over plaintext). Header keys are matched case-insensitively;
* `EmailData.headers` already lowercases them, but we don't rely on it.
*/
export function parseOneClickUnsubscribe(
headers: Record<string, string>,
): string | null {
let listUnsubscribe = "";
let post = "";
for (const [key, value] of Object.entries(headers)) {
const k = key.toLowerCase();
if (k === "list-unsubscribe") listUnsubscribe = value;
else if (k === "list-unsubscribe-post") post = value;
}
if (post.trim().toLowerCase() !== "list-unsubscribe=one-click") return null;
const matches = listUnsubscribe.match(/<([^>]+)>/g);
if (!matches) return null;
for (const token of matches) {
const url = token.slice(1, -1).trim();
if (/^https:\/\//i.test(url)) return url;
}
return null;
}
/**
* Fire a single RFC 8058 one-click unsubscribe POST. Returns whether the
* endpoint accepted it. Never throws: network/timeout errors are logged and
* reported as a failure so callers can keep going.
*/
export async function sendOneClickUnsubscribe(url: string): Promise<boolean> {
try {
const res = await fetch(url, {
method: "POST",
redirect: "follow",
signal: AbortSignal.timeout(UNSUBSCRIBE_TIMEOUT_MS),
headers: {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "kill-the-news/1.0",
},
body: "List-Unsubscribe=One-Click",
});
return res.ok;
} catch (error) {
logger.warn("One-click unsubscribe failed", { url, error: String(error) });
return false;
}
}
/**
* Send one-click unsubscribe requests for a batch of URLs (de-duplicated) and
* record the number that succeeded in the `unsubscribes_sent` counter. Never
* throws — intended to run in the background via ctx.waitUntil on feed deletion.
*/
export async function sendUnsubscribes(
urls: string[],
env: Env,
): Promise<void> {
const unique = Array.from(new Set(urls.filter(Boolean)));
if (unique.length === 0) return;
const results = await Promise.allSettled(
unique.map((url) => sendOneClickUnsubscribe(url)),
);
const succeeded = results.filter(
(r) => r.status === "fulfilled" && r.value,
).length;
if (succeeded > 0) {
await bumpCounters(env.EMAIL_STORAGE, { unsubscribes_sent: succeeded });
}
}
+30
View File
@@ -0,0 +1,30 @@
import { Env } from "../types";
export function baseUrl(env: Env): string {
return `https://${env.DOMAIN}`;
}
export function feedRssUrl(feedId: string, env: Env): string {
return `${baseUrl(env)}/rss/${feedId}`;
}
export function feedAtomUrl(feedId: string, env: Env): string {
return `${baseUrl(env)}/atom/${feedId}`;
}
export function feedUrl(
format: "rss" | "atom",
feedId: string,
env: Env,
): string {
return format === "rss" ? feedRssUrl(feedId, env) : feedAtomUrl(feedId, env);
}
export function feedEmailAddress(feedId: string, env: Env): string {
return `${feedId}@${env.EMAIL_DOMAIN ?? env.DOMAIN}`;
}
export function feedTopicPattern(env: Env): RegExp {
const escaped = env.DOMAIN.replaceAll(".", "\\.");
return new RegExp(`^https://${escaped}/(rss|atom)/([^/]+)$`);
}
+541
View File
@@ -0,0 +1,541 @@
import { describe, it, expect } from "vitest";
import { http, HttpResponse } from "msw";
import { server, createMockEnv } from "../test/setup";
import {
buildHmacSignature,
getSubscriptions,
saveSubscriptions,
notifySubscribers,
verifyAndStoreSubscription,
verifyAndDeleteSubscription,
} from "./websub";
import type { Env, WebSubSubscription } from "../types";
const mockEnv = () => createMockEnv() as unknown as Env;
describe("buildHmacSignature", () => {
it("returns sha256= prefixed hex", async () => {
const sig = await buildHmacSignature("hello", "secret");
expect(sig).toMatch(/^sha256=[0-9a-f]{64}$/);
});
it("produces different sigs for different secrets", async () => {
const a = await buildHmacSignature("body", "secret1");
const b = await buildHmacSignature("body", "secret2");
expect(a).not.toBe(b);
});
it("produces the same sig for same inputs", async () => {
const a = await buildHmacSignature("body", "secret");
const b = await buildHmacSignature("body", "secret");
expect(a).toBe(b);
});
});
describe("getSubscriptions / saveSubscriptions", () => {
it("returns empty array when no subs exist", async () => {
const env = mockEnv();
expect(await getSubscriptions("feed1", env)).toEqual([]);
});
it("round-trips stored subscriptions", async () => {
const env = mockEnv();
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://reader.example/sub",
expiresAt: Date.now() + 60000,
},
];
await saveSubscriptions("feed1", subs, env);
expect(await getSubscriptions("feed1", env)).toEqual(subs);
});
it("uses the correct KV key", async () => {
const env = mockEnv();
await saveSubscriptions("abc", [], env);
expect(
await env.EMAIL_STORAGE.get("websub:subs:abc", { type: "json" }),
).toEqual([]);
});
});
describe("notifySubscribers", () => {
it("does nothing when no subscriptions exist", async () => {
const env = mockEnv();
let called = false;
server.use(
http.post("https://reader.example/callback", () => {
called = true;
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(called).toBe(false);
});
it("does nothing when feed metadata missing", async () => {
const env = mockEnv();
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://reader.example/callback",
expiresAt: Date.now() + 60000,
},
];
await saveSubscriptions("feed1", subs, env);
let called = false;
server.use(
http.post("https://reader.example/callback", () => {
called = true;
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(called).toBe(false);
});
it("POSTs feed XML to subscriber callback", async () => {
const env = mockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:metadata",
JSON.stringify({ emails: [] }),
);
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({
title: "Test Feed",
language: "en",
created_at: Date.now(),
}),
);
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://reader.example/callback",
expiresAt: Date.now() + 60000,
},
];
await saveSubscriptions("feed1", subs, env);
let receivedBody = "";
let receivedContentType = "";
server.use(
http.post("https://reader.example/callback", async ({ request }) => {
receivedBody = await request.text();
receivedContentType = request.headers.get("Content-Type") ?? "";
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(receivedBody).toContain("<?xml");
expect(receivedContentType).toContain("application/rss+xml");
});
it("includes X-Hub-Signature-256 header when secret set (no X-Hub-Signature)", async () => {
const env = mockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:metadata",
JSON.stringify({ emails: [] }),
);
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({
title: "Test Feed",
language: "en",
created_at: Date.now(),
}),
);
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://reader.example/callback",
expiresAt: Date.now() + 60000,
secret: "mysecret",
},
];
await saveSubscriptions("feed1", subs, env);
let receivedSig256 = "";
let receivedSig = "";
server.use(
http.post("https://reader.example/callback", async ({ request }) => {
receivedSig256 = request.headers.get("X-Hub-Signature-256") ?? "";
receivedSig = request.headers.get("X-Hub-Signature") ?? "";
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(receivedSig256).toMatch(/^sha256=[0-9a-f]{64}$/);
expect(receivedSig).toBe(""); // legacy header should NOT be sent
});
it("POSTs Atom feed XML with correct Content-Type to Atom subscriber", async () => {
const env = mockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:metadata",
JSON.stringify({ emails: [] }),
);
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({
title: "Test Feed",
language: "en",
created_at: Date.now(),
}),
);
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://atom-reader.example/callback",
expiresAt: Date.now() + 60000,
format: "atom",
},
];
await saveSubscriptions("feed1", subs, env);
let receivedContentType = "";
let receivedLink = "";
server.use(
http.post("https://atom-reader.example/callback", async ({ request }) => {
receivedContentType = request.headers.get("Content-Type") ?? "";
receivedLink = request.headers.get("Link") ?? "";
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(receivedContentType).toContain("application/atom+xml");
expect(receivedLink).toContain(`/atom/feed1`);
expect(receivedLink).toContain(`rel="self"`);
});
it("notifies RSS and Atom subscribers independently with correct formats", async () => {
const env = mockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:metadata",
JSON.stringify({ emails: [] }),
);
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({
title: "Test Feed",
language: "en",
created_at: Date.now(),
}),
);
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://rss-reader.example/callback",
expiresAt: Date.now() + 60000,
format: "rss",
},
{
callbackUrl: "https://atom-reader.example/callback",
expiresAt: Date.now() + 60000,
format: "atom",
},
];
await saveSubscriptions("feed1", subs, env);
const received: Record<string, string> = {};
server.use(
http.post("https://rss-reader.example/callback", async ({ request }) => {
received.rss = request.headers.get("Content-Type") ?? "";
return HttpResponse.text("ok");
}),
http.post("https://atom-reader.example/callback", async ({ request }) => {
received.atom = request.headers.get("Content-Type") ?? "";
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(received.rss).toContain("application/rss+xml");
expect(received.atom).toContain("application/atom+xml");
});
it("prunes expired subscriptions and does not notify them", async () => {
const env = mockEnv();
await env.EMAIL_STORAGE.put(
"feed:feed1:metadata",
JSON.stringify({ emails: [] }),
);
await env.EMAIL_STORAGE.put(
"feed:feed1:config",
JSON.stringify({
title: "Test Feed",
language: "en",
created_at: Date.now(),
}),
);
const subs: WebSubSubscription[] = [
{
callbackUrl: "https://expired.example/callback",
expiresAt: Date.now() - 1000,
},
{
callbackUrl: "https://active.example/callback",
expiresAt: Date.now() + 60000,
},
];
await saveSubscriptions("feed1", subs, env);
const notified: string[] = [];
server.use(
http.post("https://expired.example/callback", () => {
notified.push("expired");
return HttpResponse.text("ok");
}),
http.post("https://active.example/callback", () => {
notified.push("active");
return HttpResponse.text("ok");
}),
);
await notifySubscribers("feed1", env);
expect(notified).toEqual(["active"]);
const remaining = await getSubscriptions("feed1", env);
expect(remaining).toHaveLength(1);
expect(remaining[0].callbackUrl).toBe("https://active.example/callback");
});
});
describe("verifyAndStoreSubscription", () => {
it("stores subscription and returns true when callback echoes challenge", async () => {
const env = mockEnv();
server.use(
http.get("https://reader.example/callback", ({ request }) => {
const url = new URL(request.url);
const challenge = url.searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge);
}),
);
const result = await verifyAndStoreSubscription(
"feed1",
"https://reader.example/callback",
undefined,
86400,
"rss",
env,
);
expect(result).toBe(true);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(1);
expect(subs[0].callbackUrl).toBe("https://reader.example/callback");
expect(subs[0].expiresAt).toBeGreaterThan(Date.now());
});
it("stores format=atom and sends atom topic URL in verification request", async () => {
const env = mockEnv();
let receivedTopic = "";
server.use(
http.get("https://reader.example/callback", ({ request }) => {
const url = new URL(request.url);
receivedTopic = url.searchParams.get("hub.topic") ?? "";
const challenge = url.searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge);
}),
);
const result = await verifyAndStoreSubscription(
"feed1",
"https://reader.example/callback",
undefined,
86400,
"atom",
env,
);
expect(result).toBe(true);
expect(receivedTopic).toContain("/atom/feed1");
const subs = await getSubscriptions("feed1", env);
expect(subs[0].format).toBe("atom");
});
it("returns false and does not store when callback returns wrong challenge", async () => {
const env = mockEnv();
server.use(
http.get("https://reader.example/callback", () =>
HttpResponse.text("wrong"),
),
);
const result = await verifyAndStoreSubscription(
"feed1",
"https://reader.example/callback",
undefined,
86400,
"rss",
env,
);
expect(result).toBe(false);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(0);
});
it("updates existing subscription with same callback", async () => {
const env = mockEnv();
const existing: WebSubSubscription[] = [
{ callbackUrl: "https://reader.example/callback", expiresAt: 1000 },
];
await saveSubscriptions("feed1", existing, env);
server.use(
http.get("https://reader.example/callback", ({ request }) => {
const challenge =
new URL(request.url).searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge);
}),
);
const result = await verifyAndStoreSubscription(
"feed1",
"https://reader.example/callback",
"newsecret",
3600,
"rss",
env,
);
expect(result).toBe(true);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(1);
expect(subs[0].secret).toBe("newsecret");
});
it("returns false when callback fetch fails", async () => {
const env = mockEnv();
server.use(
http.get("https://reader.example/callback", () => HttpResponse.error()),
);
const result = await verifyAndStoreSubscription(
"feed1",
"https://reader.example/callback",
undefined,
86400,
"rss",
env,
);
expect(result).toBe(false);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(0);
});
it("returns false when callback returns non-ok HTTP status", async () => {
const env = mockEnv();
server.use(
http.get("https://reader.example/callback", ({ request }) => {
const challenge =
new URL(request.url).searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge, { status: 500 });
}),
);
const result = await verifyAndStoreSubscription(
"feed1",
"https://reader.example/callback",
undefined,
86400,
"rss",
env,
);
expect(result).toBe(false);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(0);
});
});
describe("verifyAndDeleteSubscription", () => {
it("removes subscription and returns true when callback echoes challenge", async () => {
const env = mockEnv();
await saveSubscriptions(
"feed1",
[
{
callbackUrl: "https://reader.example/callback",
expiresAt: Date.now() + 60000,
},
],
env,
);
server.use(
http.get("https://reader.example/callback", ({ request }) => {
const challenge =
new URL(request.url).searchParams.get("hub.challenge") ?? "";
return HttpResponse.text(challenge);
}),
);
const result = await verifyAndDeleteSubscription(
"feed1",
"https://reader.example/callback",
env,
);
expect(result).toBe(true);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(0);
});
it("returns false and leaves subscription intact when callback returns wrong challenge", async () => {
const env = mockEnv();
await saveSubscriptions(
"feed1",
[
{
callbackUrl: "https://reader.example/callback",
expiresAt: Date.now() + 60000,
},
],
env,
);
server.use(
http.get("https://reader.example/callback", () =>
HttpResponse.text("nope"),
),
);
const result = await verifyAndDeleteSubscription(
"feed1",
"https://reader.example/callback",
env,
);
expect(result).toBe(false);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(1);
});
it("returns false and leaves subscription intact when callback fetch fails", async () => {
const env = mockEnv();
await saveSubscriptions(
"feed1",
[
{
callbackUrl: "https://reader.example/callback",
expiresAt: Date.now() + 60000,
},
],
env,
);
server.use(
http.get("https://reader.example/callback", () => HttpResponse.error()),
);
const result = await verifyAndDeleteSubscription(
"feed1",
"https://reader.example/callback",
env,
);
expect(result).toBe(false);
const subs = await getSubscriptions("feed1", env);
expect(subs).toHaveLength(1);
});
});
+228
View File
@@ -0,0 +1,228 @@
import { Env, FeedConfig, EmailData, WebSubSubscription } from "../types";
import { generateRssFeed, generateAtomFeed } from "./feed-generator";
import { baseUrl, feedRssUrl, feedAtomUrl, feedUrl } from "./urls";
import { FeedRepository } from "../domain/feed-repository";
import { WebSubSubscriptionRepository } from "../domain/websub-subscription-repository";
import { FeedId } from "../domain/value-objects/feed-id";
export async function getSubscriptions(
feedId: string,
env: Env,
): Promise<WebSubSubscription[]> {
return WebSubSubscriptionRepository.from(env).get(feedId);
}
export async function saveSubscriptions(
feedId: string,
subscriptions: WebSubSubscription[],
env: Env,
): Promise<void> {
await WebSubSubscriptionRepository.from(env).save(feedId, subscriptions);
}
export async function buildHmacSignature(
body: string,
secret: string,
): Promise<string> {
const key = await crypto.subtle.importKey(
"raw",
new TextEncoder().encode(secret),
{ name: "HMAC", hash: "SHA-256" },
false,
["sign"],
);
const sig = await crypto.subtle.sign(
"HMAC",
key,
new TextEncoder().encode(body),
);
const hex = Array.from(new Uint8Array(sig))
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
return `sha256=${hex}`;
}
async function buildFeedXml(
feedId: string,
env: Env,
format: "rss" | "atom" = "rss",
): Promise<string | null> {
const repo = FeedRepository.from(env);
const id = FeedId.fromTrusted(feedId);
const [feedMetadata, rawConfig] = await Promise.all([
repo.getMetadata(id),
repo.getConfig(id),
]);
if (!feedMetadata) return null;
const base = baseUrl(env);
const feedConfig: FeedConfig = rawConfig ?? {
title: `Newsletter Feed ${feedId}`,
description: "Converted email newsletter",
language: "en",
created_at: Date.now(),
};
const emails = feedMetadata.emails.slice(0, 20);
const emailsData = (
await Promise.all(emails.map((m) => repo.getEmail(m.key)))
).filter((d): d is EmailData => d !== null);
if (format === "atom") {
return generateAtomFeed(
feedConfig,
emailsData,
base,
feedId,
feedAtomUrl(feedId, env),
);
}
return generateRssFeed(feedConfig, emailsData, base, feedId);
}
export async function notifySubscribers(
feedId: string,
env: Env,
): Promise<void> {
const subs = await getSubscriptions(feedId, env);
const now = Date.now();
const active = subs.filter((s) => s.expiresAt > now);
if (active.length === 0) {
if (active.length < subs.length) {
await saveSubscriptions(feedId, active, env);
}
return;
}
const rssSubs = active.filter((s) => (s.format ?? "rss") === "rss");
const atomSubs = active.filter((s) => s.format === "atom");
const [rssFeed, atomFeed] = await Promise.all([
rssSubs.length > 0 ? buildFeedXml(feedId, env, "rss") : null,
atomSubs.length > 0 ? buildFeedXml(feedId, env, "atom") : null,
]);
if (!rssFeed && !atomFeed) return;
const base = baseUrl(env);
const deliver = async (
sub: WebSubSubscription,
feedXml: string,
contentType: string,
selfPath: string,
) => {
const linkHeader = `<${base}/hub>; rel="hub", <${base}${selfPath}>; rel="self"`;
const headers: Record<string, string> = {
"Content-Type": contentType,
Link: linkHeader,
};
if (sub.secret) {
headers["X-Hub-Signature-256"] = await buildHmacSignature(
feedXml,
sub.secret,
);
}
const res = await fetch(sub.callbackUrl, {
method: "POST",
headers,
body: feedXml,
});
if (!res.ok) {
console.error(
`WebSub: delivery failed ${sub.callbackUrl}: ${res.status}`,
);
}
};
await Promise.allSettled([
...(rssFeed
? rssSubs.map((sub) =>
deliver(sub, rssFeed, "application/rss+xml", `/rss/${feedId}`),
)
: []),
...(atomFeed
? atomSubs.map((sub) =>
deliver(sub, atomFeed, "application/atom+xml", `/atom/${feedId}`),
)
: []),
]);
if (active.length < subs.length) {
await saveSubscriptions(feedId, active, env);
}
}
async function verifyCallback(
callbackUrl: string,
params: Record<string, string>,
): Promise<boolean> {
const challenge = crypto.randomUUID().replace(/-/g, "");
const url = new URL(callbackUrl);
for (const [k, v] of Object.entries(params)) url.searchParams.set(k, v);
url.searchParams.set("hub.challenge", challenge);
let res: Response;
try {
res = await fetch(url.toString());
} catch {
return false;
}
if (!res.ok) return false;
return (await res.text()).trim() === challenge;
}
export async function verifyAndStoreSubscription(
feedId: string,
callbackUrl: string,
secret: string | undefined,
leaseSeconds: number,
format: "rss" | "atom",
env: Env,
): Promise<boolean> {
const verified = await verifyCallback(callbackUrl, {
"hub.mode": "subscribe",
"hub.topic": feedUrl(format, feedId, env),
"hub.lease_seconds": String(leaseSeconds),
});
if (!verified) return false;
const subs = await getSubscriptions(feedId, env);
const idx = subs.findIndex((s) => s.callbackUrl === callbackUrl);
const entry: WebSubSubscription = {
callbackUrl,
expiresAt: Date.now() + leaseSeconds * 1000,
format,
...(secret ? { secret } : {}),
};
if (idx >= 0) {
subs[idx] = entry;
} else {
subs.push(entry);
}
await saveSubscriptions(feedId, subs, env);
return true;
}
export async function verifyAndDeleteSubscription(
feedId: string,
callbackUrl: string,
env: Env,
): Promise<boolean> {
const verified = await verifyCallback(callbackUrl, {
"hub.mode": "unsubscribe",
"hub.topic": feedRssUrl(feedId, env),
});
if (!verified) return false;
const subs = await getSubscriptions(feedId, env);
await saveSubscriptions(
feedId,
subs.filter((s) => s.callbackUrl !== callbackUrl),
env,
);
return true;
}
+10
View File
@@ -0,0 +1,10 @@
import { Context } from "hono";
/** Calls ctx.waitUntil() without throwing when the ExecutionContext is absent (e.g. Node tests). */
export function waitUntilSafe(c: Context, promise: Promise<unknown>): void {
try {
c.executionCtx.waitUntil(promise);
} catch {
// ExecutionContext unavailable in Node test environment — ignore.
}
}