From 0abd5f306cd15a566f9d1a3501b39e19e494a112 Mon Sep 17 00:00:00 2001 From: Julien Herr Date: Sun, 24 May 2026 20:47:54 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20reader-compat=20batch=20=E2=80=94=20JSO?= =?UTF-8?q?N=20Feed,=20OPML=20export,=20conditional=20GET,=20dedup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Batch of four reader-facing improvements (TODO "Compat lecteurs + dedup"): - JSON Feed at /json/:feedId (feed lib .json1()); all formats cross-link - OPML export at /admin/opml (admin-protected; the registry lists every feed URL, so it must not be public) - Conditional GET on /rss + /atom: strong ETag + Last-Modified, 304 on If-None-Match/If-Modified-Since, validators shared via http-cache.ts - Duplicate-send dedup in ingestion: match by Message-ID, fall back to a SHA-256 of normalized subject+content; a duplicate is a no-op and bumps the new emails_deduplicated counter (status page + /api/v1/stats) 429 tests green, tsc clean, build dry-run OK. Docs (README/CLAUDE/TODO + landing cards) updated. Co-Authored-By: Claude Opus 4.7 --- CLAUDE.md | 11 +- README.md | 10 +- TODO.md | 8 +- docs/index.html | 16 ++ src/application/email-processor.test.ts | 131 ++++++++++++++++ src/application/email-processor.ts | 55 ++++++- src/application/stats.ts | 2 + src/domain/feed.aggregate.ts | 24 +++ src/index.ts | 6 + .../counters-repository.test.ts | 1 + src/infrastructure/feed-generator.ts | 19 ++- src/infrastructure/http-cache.ts | 68 +++++++++ src/routes/admin.tsx | 4 + src/routes/atom.test.ts | 113 ++++++++++++++ src/routes/atom.ts | 18 +++ src/routes/home.tsx | 1 + src/routes/json.test.ts | 143 ++++++++++++++++++ src/routes/json.ts | 51 +++++++ src/routes/opml.test.ts | 139 +++++++++++++++++ src/routes/opml.ts | 56 +++++++ src/routes/rss.test.ts | 129 ++++++++++++++++ src/routes/rss.ts | 18 +++ src/types/index.ts | 3 + 23 files changed, 1015 insertions(+), 11 deletions(-) create mode 100644 src/infrastructure/http-cache.ts create mode 100644 src/routes/json.test.ts create mode 100644 src/routes/json.ts create mode 100644 src/routes/opml.test.ts create mode 100644 src/routes/opml.ts diff --git a/CLAUDE.md b/CLAUDE.md index 640228f..5d99759 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -46,11 +46,13 @@ Single Cloudflare Worker built with Hono. Routes: | `GET /api/v1/stats` | Public monitoring counters (JSON, CORS); canonical stats endpoint | | `GET /api/openapi.json` | OpenAPI 3.1 spec (public) | | `GET /api/docs` | Rendered API reference (Scalar, public) | -| `GET /rss/:feedId` | Public RSS 2.0 feed | -| `GET /atom/:feedId` | Public Atom feed (with WebSub hub header) | +| `GET /rss/:feedId` | Public RSS 2.0 feed (conditional GET: ETag/Last-Modified/304) | +| `GET /atom/:feedId` | Public Atom feed (WebSub hub header; conditional GET ETag/304) | +| `GET /json/:feedId` | Public JSON Feed | | `GET /entries/:feedId/:entryId` | Individual email HTML view | | `GET /files/:attachmentId/:filename` | R2 attachment serving | | `GET /admin` | Password-protected admin UI | +| `GET /admin/opml` | OPML export of all feeds (admin-protected) | | `/hub` | WebSub hub (subscribe/publish) | | `GET /favicon.svg`, `/favicon.ico` | Project favicon (envelope logo); fallback for per-feed favicons | | `GET /favicon/:feedId` | Per-feed favicon from the last sender's domain (falls back to project) | @@ -94,7 +96,8 @@ src/ worker.ts # Typed worker / waitUntil helper attachments.ts # R2 bucket accessor favicon-fetcher.ts # Outbound favicon fetch + cache (uses IconRepository) - feed-generator.ts # RSS/Atom XML generation + feed-generator.ts # RSS/Atom/JSON Feed XML+JSON generation + http-cache.ts # Conditional-GET validators (ETag/Last-Modified) for feed routes html-processor.ts # Email HTML sanitization / inline cid: rewriting websub.ts # WebSub subscription management + delivery unsubscribe.ts # RFC 8058 one-click unsubscribe dispatch @@ -103,6 +106,8 @@ src/ inbound.ts # ForwardEmail webhook handler rss.ts # RSS feed renderer atom.ts # Atom feed renderer + json.ts # JSON Feed renderer + opml.ts # OPML export of all feeds (admin-protected handler) entries.ts # Single email HTML view files.ts # R2 attachment serving hub.ts # WebSub hub diff --git a/README.md b/README.md index 0b077e9..c31a3f2 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ kill-the-news keeps the same workflow while avoiding shared domains and shared d - Optional per-feed sender allowlist (`email@domain.com` or `domain.com`) - RSS generation on demand (`/rss/:feedId`) - Atom feed at `/atom/:feedId` +- JSON Feed at `/json/:feedId` (natively consumed by NetNewsWire, Reeder, NewsBlur, Feedly) +- Bandwidth-friendly polling: RSS/Atom send a strong `ETag` + `Last-Modified` and answer `304 Not Modified` on conditional requests +- Duplicate-send dedup: a newsletter delivered twice (matched by `Message-ID`, then by a content hash) is stored once +- OPML export of all feeds at `/admin/opml` (admin-protected) for one-click bulk import into any reader - Reader-friendly output: relative links/images absolutized to the sender's site, lazy-loaded images promoted (`data-src` → `src`), plain-text feed titles, and XML-illegal control characters stripped so feeds parse in strict readers - Per-feed favicon derived from the last sender's domain (`/favicon/:feedId`), cached and shown in feeds + admin - Automatic RFC 8058 one-click unsubscribe when a feed is deleted — stops newsletters from mailing the now-dead address @@ -51,8 +55,10 @@ Main routes: - `src/lib/cloudflare-email.ts`: Cloudflare Email Workers ingestion - `src/routes/inbound.ts`: ForwardEmail webhook ingestion -- `src/routes/rss.ts`: RSS rendering -- `src/routes/atom.ts`: Atom feed rendering +- `src/routes/rss.ts`: RSS rendering (with conditional-GET / ETag support) +- `src/routes/atom.ts`: Atom feed rendering (with conditional-GET / ETag support) +- `src/routes/json.ts`: JSON Feed rendering +- `src/routes/opml.ts`: OPML export of all feeds (admin-protected, mounted at `/admin/opml`) - `src/routes/files.ts`: attachment file serving from R2 - `src/routes/admin.tsx`: admin UI + feed CRUD - `src/routes/api/`: versioned REST API + OpenAPI spec/docs (`/api/v1/*`, `/api/openapi.json`, `/api/docs`) diff --git a/TODO.md b/TODO.md index 17c6bd2..2e941bc 100644 --- a/TODO.md +++ b/TODO.md @@ -70,7 +70,7 @@ Ideas from competitors (Feedbin, Readwise Reader, Inoreader, Omnivore, LetterFee ### Feed-output enrichments (small XML wins — we use the `feed` lib, which already emits `content:encoded`, `atom:link rel="self"`, stable ``) -- [ ] `P2·S` **JSON Feed 1.1 endpoint** `GET /json/:feedId` **[differentiating, cheap]** — the `feed` lib already supports `.json1()`; we only expose `.rss2()`/`.atom1()` (`src/infrastructure/feed-generator.ts`). Natively consumed by NetNewsWire, Reeder, NewsBlur, Feedly. ~1 route + 1 generator fn. — _origin: [JSON Feed 1.1 spec](https://www.jsonfeed.org/version/1.1/) (reader ecosystem)_ +- [x] `P2·S` **JSON Feed endpoint** `GET /json/:feedId` **[differentiating, cheap]** — the `feed` lib's `.json1()` (emits JSON Feed v1) wired via `generateJsonFeed` in `src/infrastructure/feed-generator.ts`, served at `/json/:feedId` (`src/routes/json.ts`) with `Content-Type: application/feed+json` + WebSub hub `Link`. All three formats cross-link via `feedLinks`. Natively consumed by NetNewsWire, Reeder, NewsBlur, Feedly. — _origin: [JSON Feed 1.1 spec](https://www.jsonfeed.org/version/1.1/) (reader ecosystem)_ - [ ] `P2·M` **Per-item `` + per-feed tags/categories** **[differentiating]** — we set no categories today. Tag entries by sender (or a user-set feed category) so readers (Inoreader, Feedly, NewsBlur) can filter/mute subsets. Pairs with the filtering item below; touches `FeedState`, `feed-generator.ts`. — _origin: [RSS best practices (kevincox)](https://kevincox.ca/2022/05/06/rss-feed-best-practices/); Inoreader/Feedly filtering_ @@ -88,7 +88,7 @@ Ideas from competitors (Feedbin, Readwise Reader, Inoreader, Omnivore, LetterFee ### Reading experience -- [ ] `P2·S` **OPML export** `GET /opml` **[table-stakes, easy]** — export all feeds as an OPML outline so users can bulk-import every feed into their reader in one shot. Every reader imports OPML; strong onboarding/migration win. Pure read over the feed registry. — _origin: reader ecosystem ([NetNewsWire](https://github.com/Ranchero-Software/NetNewsWire/)); Feedbin OPML export_ +- [x] `P2·S` **OPML export** `GET /admin/opml` **[table-stakes, easy]** — export all feeds as an OPML 2.0 outline (`` per feed, XML-attr-escaped) so users can bulk-import every feed into their reader in one shot. Mounted on the admin Hono app (inherits the admin auth middleware) rather than public, because the registry lists every feed's RSS URL — a public endpoint would leak them all. Returns `Content-Disposition: attachment; filename="feeds.opml"`. Implemented in `src/routes/opml.ts` over `FeedRepository.listFeeds()`. — _origin: reader ecosystem ([NetNewsWire](https://github.com/Ranchero-Software/NetNewsWire/)); Feedbin OPML export_ - [ ] `P2·L` **Full-text search across received emails** **[differentiating]** — admin-side search over subjects + bodies (Omnivore/Feedbin have this). On KV this means an index or scan; consider scope (subject-only first) before building. — _origin: [Omnivore](https://www.timeatlas.com/omnivore-newsletters/); Feedbin search_ @@ -106,13 +106,13 @@ Verified-missing in our code, deduplicated against the sections above. From a co ### Delivery / bandwidth -- [ ] `P2·S` **Conditional GET on feeds (ETag + Last-Modified + 304)** **[table-stakes, easy]** — `rss.ts`/`atom.ts` only send `Cache-Control: max-age=1800`; no validators. Emit a strong `ETag` (hash of the latest entry id + count) and `Last-Modified` (newest `receivedAt`), and return `304 Not Modified` on `If-None-Match`/`If-Modified-Since`. Cuts bandwidth for every polling reader. Generate the ETag _before_ compression. — _origin: internal code audit ([RFC 9110 conditional requests](https://www.rfc-editor.org/rfc/rfc9110#name-conditional-requests))_ +- [x] `P2·S` **Conditional GET on feeds (ETag + Last-Modified + 304)** **[table-stakes, easy]** — `rss.ts`/`atom.ts` now emit a strong `ETag` (`"---"`) and `Last-Modified` (newest `receivedAt`), and return `304 Not Modified` on matching `If-None-Match`/`If-Modified-Since` before generating any XML. Validators are computed from the loaded `FeedData` (not the rendered bytes) in `src/infrastructure/http-cache.ts` (`computeFeedValidators`/`isNotModified`/`notModifiedResponse`), shared by both routes; rss vs atom get distinct ETags via the format prefix. Cuts bandwidth for every polling reader. — _origin: internal code audit ([RFC 9110 conditional requests](https://www.rfc-editor.org/rfc/rfc9110#name-conditional-requests))_ - [ ] `P3·L` **RFC 5005 paged / archived feeds** **[differentiating, niche]** — readers only ever see the capped current window; older entries vanish. Mark the subscription document `fh:complete` and expose `prev-archive` pages so readers can backfill history. Pairs naturally with our expiring-feed model (an expired feed = a sealed archive). ([RFC 5005](https://www.rfc-editor.org/rfc/rfc5005.html)) ### Ingestion robustness -- [ ] `P1·M` **Duplicate-send dedup** **[differentiating]** — the same newsletter resent (or delivered twice) creates two entries today (key = `receivedAt`). Dedup by `Message-ID` first, then a SHA-256 of normalized subject+body within a short window, in `src/application/email-processor.ts`. Fixes the upstream "duplicate posts" complaint ([#31](https://github.com/leafac/kill-the-newsletter/issues/31), [#6](https://github.com/leafac/kill-the-newsletter/issues/6)). +- [x] `P1·M` **Duplicate-send dedup** **[differentiating]** — a newsletter resent (or delivered twice) is now stored once. `storeEmail` (`src/application/email-processor.ts`) computes the `Message-ID` (case-insensitive header lookup) and a SHA-256 of normalized `subject+content`, then asks the aggregate `feed.hasDuplicate(messageId, dedupHash)` (`src/domain/feed.aggregate.ts`): primary match on `Message-ID`, fallback to the content hash when neither side has a Message-ID. A duplicate is a successful no-op (`{ ok: true }`, nothing stored/dispatched) and bumps a new `emails_deduplicated` counter (status page + `/api/v1/stats`). `EmailMetadata` gained additive `messageId?`/`dedupHash?` fields, so pre-feature entries never false-match. Fixes the upstream "duplicate posts" complaint ([#31](https://github.com/leafac/kill-the-newsletter/issues/31), [#6](https://github.com/leafac/kill-the-newsletter/issues/6)). - [ ] `P3·M` **Calendar (.ics) invite extraction** **[differentiating, novel]** — no email→feed tool does this. Detect `text/calendar` parts, parse the event, and surface it in the entry (summary + an `.ics` enclosure / add-to-calendar link). Useful for event/booking newsletters. — _origin: internal (novel; no external requester)_ diff --git a/docs/index.html b/docs/index.html index 9a5d77a..9d6368f 100644 --- a/docs/index.html +++ b/docs/index.html @@ -858,6 +858,22 @@

Automate feeds and emails through a versioned REST API, documented with an OpenAPI 3.1 spec and a live interactive reference.

+
+
+ +
+

RSS, Atom & JSON Feed

+

Every feed is served in all three formats — RSS 2.0, Atom, and JSON Feed — so it just works in NetNewsWire, Reeder, Feedly, NewsBlur and any other reader. Conditional requests (ETag / 304) keep polling cheap.

+
+ +
+
+ +
+

One-Click OPML Export

+

Export all your feeds as an OPML file and bulk-import them into any reader in one shot — easy onboarding, and no lock-in if you ever want to move.

+
+ diff --git a/src/application/email-processor.test.ts b/src/application/email-processor.test.ts index 4eb349a..bd0e6c0 100644 --- a/src/application/email-processor.test.ts +++ b/src/application/email-processor.test.ts @@ -595,6 +595,134 @@ describe("processEmail — attachments", () => { }); }); +describe("processEmail — deduplication", () => { + let env: ReturnType; + + beforeEach(async () => { + env = createMockEnv(); + await env.EMAIL_STORAGE.put( + `feed:${VALID_FEED_ID}:config`, + JSON.stringify({}), + ); + }); + + it("stores only one email when the same Message-ID is delivered twice", async () => { + const headers = { "Message-ID": "" }; + await processEmail(makeInput({ headers }), env as any); + await processEmail(makeInput({ headers }), env as any); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + expect(metadata.emails).toHaveLength(1); + }); + + it("increments emails_deduplicated counter on the second delivery", async () => { + const headers = { "Message-ID": "" }; + await processEmail(makeInput({ headers }), env as any); + await processEmail(makeInput({ headers }), env as any); + + const counters = await getCounters(env.EMAIL_STORAGE as any); + expect(counters.emails_deduplicated).toBe(1); + }); + + it("deduplicates by hash when no Message-ID header is present", async () => { + const input = makeInput({ + subject: "Weekly Digest", + content: "

Same content

", + }); + await processEmail(input, env as any); + await processEmail(input, env as any); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + expect(metadata.emails).toHaveLength(1); + + const counters = await getCounters(env.EMAIL_STORAGE as any); + expect(counters.emails_deduplicated).toBe(1); + }); + + it("does not deduplicate emails with different subjects (no Message-ID)", async () => { + await processEmail( + makeInput({ subject: "First", content: "

body

" }), + env as any, + ); + await processEmail( + makeInput({ subject: "Second", content: "

body

" }), + env as any, + ); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + expect(metadata.emails).toHaveLength(2); + + const counters = await getCounters(env.EMAIL_STORAGE as any); + expect(counters.emails_deduplicated).toBe(0); + }); + + it("does not false-positive against pre-feature entries lacking messageId/dedupHash", async () => { + // Seed a legacy metadata entry with no messageId or dedupHash + await env.EMAIL_STORAGE.put( + `feed:${VALID_FEED_ID}:metadata`, + JSON.stringify({ + emails: [ + { + key: `feed:${VALID_FEED_ID}:999`, + subject: "Old Subject", + receivedAt: 999, + size: 50, + // intentionally no messageId, no dedupHash + }, + ], + }), + ); + + // A new, distinct email should be stored without triggering false dedup + const res = await processEmail( + makeInput({ subject: "New Distinct Email", content: "

fresh

" }), + env as any, + ); + expect(res.ok).toBe(true); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + expect(metadata.emails).toHaveLength(2); + + const counters = await getCounters(env.EMAIL_STORAGE as any); + expect(counters.emails_deduplicated).toBe(0); + }); + + it("returns { ok: true } for a genuine duplicate (not a rejection)", async () => { + const headers = { "Message-ID": "" }; + await processEmail(makeInput({ headers }), env as any); + const res = await processEmail(makeInput({ headers }), env as any); + expect(res).toMatchObject({ ok: true }); + }); + + it("stores messageId and dedupHash in the email metadata entry", async () => { + const headers = { "Message-ID": "" }; + await processEmail( + makeInput({ subject: "Sub", content: "

c

", headers }), + env as any, + ); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + expect(metadata.emails[0].messageId).toBe(""); + expect(typeof metadata.emails[0].dedupHash).toBe("string"); + expect(metadata.emails[0].dedupHash).toHaveLength(64); // SHA-256 hex + }); +}); + describe("processEmail — monitoring counters", () => { it("increments emails_received and sets last_email_at on success", async () => { const env = createMockEnv(); @@ -709,6 +837,7 @@ describe("processEmail — unsubscribe capture", () => { it("keeps one entry per sender and overwrites with the latest URL", async () => { await processEmail( makeInput({ + subject: "Issue 1 from A", senders: ["a@one.com"], headers: { "list-unsubscribe": "", @@ -719,6 +848,7 @@ describe("processEmail — unsubscribe capture", () => { ); await processEmail( makeInput({ + subject: "Issue 1 from B", senders: ["b@two.com"], headers: { "list-unsubscribe": "", @@ -729,6 +859,7 @@ describe("processEmail — unsubscribe capture", () => { ); await processEmail( makeInput({ + subject: "Issue 2 from A", senders: ["a@one.com"], headers: { "list-unsubscribe": "", diff --git a/src/application/email-processor.ts b/src/application/email-processor.ts index 1648b14..6e231ef 100644 --- a/src/application/email-processor.ts +++ b/src/application/email-processor.ts @@ -109,12 +109,62 @@ async function loadAcceptingFeed( return { ok: true, feed }; } +/** + * Compute a SHA-256 hex digest of a normalised string combining subject and + * content. Used as a dedup fallback when no Message-ID header is present. + * "Normalised" means lower-cased and all whitespace runs collapsed to a single + * space — so minor whitespace differences in re-sent mails still match. + */ +async function computeDedupHash( + subject: string, + content: string, +): Promise { + const normalize = (s: string) => s.toLowerCase().replace(/\s+/g, " ").trim(); + const raw = `${normalize(subject)}\n${normalize(content)}`; + const buf = await crypto.subtle.digest( + "SHA-256", + new TextEncoder().encode(raw), + ); + return Array.from(new Uint8Array(buf)) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); +} + +/** + * Extract the Message-ID from request headers (case-insensitive key lookup). + * Returns undefined when absent or empty. + */ +function extractMessageId( + headers: Record | undefined, +): string | undefined { + if (!headers) return undefined; + const value = Object.entries(headers).find( + ([k]) => k.toLowerCase() === "message-id", + )?.[1]; + const trimmed = value?.trim(); + return trimmed || undefined; +} + async function storeEmail( feed: Feed, input: ProcessEmailInput, env: Env, ctx?: ExecutionContext, -): Promise { +): Promise { + // ── Dedup check ────────────────────────────────────────────────────────── + // Compute both dedup signals up-front (hash is async) so we only do it once. + const messageId = extractMessageId(input.headers); + const dedupHash = await computeDedupHash(input.subject, input.content); + + if (feed.hasDuplicate(messageId, dedupHash)) { + logger.info("Duplicate email skipped", { + feedId: feed.id.value, + ...(messageId ? { messageId } : { dedupHash }), + }); + await bumpCounters(env.EMAIL_STORAGE, { emails_deduplicated: 1 }); + return false; // signal: skipped (not stored) + } + const attachmentBucket = getAttachmentBucket(env); const inlineCids = extractInlineCids(input.content); const storedAttachments: AttachmentData[] = @@ -149,6 +199,8 @@ async function storeEmail( size: serialisedSize, ...(downloadableIds.length > 0 ? { attachmentIds: downloadableIds } : {}), ...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}), + ...(messageId ? { messageId } : {}), + dedupHash, }; // Track the latest sender's domain (feed icon) and capture the RFC 8058 @@ -198,6 +250,7 @@ async function storeEmail( ? (p) => ctx.waitUntil(p) : () => {}; await dispatchFeedEvents(feed, env, schedule); + return true; // signal: stored } export async function processEmail( diff --git a/src/application/stats.ts b/src/application/stats.ts index 57ee938..0e4818e 100644 --- a/src/application/stats.ts +++ b/src/application/stats.ts @@ -12,6 +12,7 @@ const EMPTY_COUNTERS: Counters = { emails_received: 0, emails_rejected: 0, emails_forwarded: 0, + emails_deduplicated: 0, unsubscribes_sent: 0, }; @@ -43,6 +44,7 @@ export async function bumpCounters( current.emails_received += changes.emails_received ?? 0; current.emails_rejected += changes.emails_rejected ?? 0; current.emails_forwarded += changes.emails_forwarded ?? 0; + current.emails_deduplicated += changes.emails_deduplicated ?? 0; current.unsubscribes_sent += changes.unsubscribes_sent ?? 0; if (changes.last_email_at) current.last_email_at = changes.last_email_at; if (changes.last_feed_created_at) diff --git a/src/domain/feed.aggregate.ts b/src/domain/feed.aggregate.ts index 487ee59..0a9ffc5 100644 --- a/src/domain/feed.aggregate.ts +++ b/src/domain/feed.aggregate.ts @@ -203,6 +203,30 @@ export class Feed { ).decide(senders); } + /** + * Check whether the email index already contains a duplicate of the incoming + * email. Dedup uses `messageId` as the primary key (when both sides have one) + * and falls back to `dedupHash` (SHA-256 of normalised subject+content). + * Old entries that predate the feature and carry neither field are never + * matched — they cannot cause false positives. + */ + hasDuplicate(messageId?: string, dedupHash?: string): boolean { + for (const entry of this._metadata.emails) { + if (messageId && entry.messageId && entry.messageId === messageId) { + return true; + } + if ( + !messageId && + dedupHash && + entry.dedupHash && + entry.dedupHash === dedupHash + ) { + return true; + } + } + return false; + } + /** * Add an email to the front of the index, refresh the icon domain and the * per-sender unsubscribe link, then trim the oldest entries back under the diff --git a/src/index.ts b/src/index.ts index e49fc55..d4ca679 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,6 +3,7 @@ import { cors } from "hono/cors"; import { handle as handleInbound } from "./routes/inbound"; import { handle as handleRSS } from "./routes/rss"; import { handle as handleAtom } from "./routes/atom"; +import { handle as handleJSON } from "./routes/json"; import { handle as handleAdmin } from "./routes/admin"; import { handle as handleEntry } from "./routes/entries"; import { handle as handleFiles } from "./routes/files"; @@ -116,6 +117,7 @@ app.use( const api = new Hono(); const rss = new Hono(); const atom = new Hono(); +const json = new Hono(); const entries = new Hono(); const files = new Hono(); const admin = new Hono(); @@ -151,6 +153,9 @@ rss.get("/:feedId", handleRSS); // Atom feed routes (public) atom.get("/:feedId", handleAtom); +// JSON Feed routes (public) +json.get("/:feedId", handleJSON); + // Email entry HTML view (public) entries.get("/:feedId/:entryId", handleEntry); @@ -166,6 +171,7 @@ app.route("/api", api); app.route("/api", apiApp); app.route("/rss", rss); app.route("/atom", atom); +app.route("/json", json); app.route("/entries", entries); app.route("/files", files); app.route("/admin", admin); diff --git a/src/infrastructure/counters-repository.test.ts b/src/infrastructure/counters-repository.test.ts index c00c77d..61f482a 100644 --- a/src/infrastructure/counters-repository.test.ts +++ b/src/infrastructure/counters-repository.test.ts @@ -15,6 +15,7 @@ describe("CountersRepository", () => { emails_received: 2, emails_rejected: 0, emails_forwarded: 0, + emails_deduplicated: 0, unsubscribes_sent: 0, }); expect(await repo.getRaw()).toMatchObject({ emails_received: 2 }); diff --git a/src/infrastructure/feed-generator.ts b/src/infrastructure/feed-generator.ts index 2850924..4413afe 100644 --- a/src/infrastructure/feed-generator.ts +++ b/src/infrastructure/feed-generator.ts @@ -30,7 +30,7 @@ function buildFeed( emails: EmailData[], baseUrl: string, feedId: string, - selfUrl?: { rss?: string; atom?: string }, + selfUrl?: { rss?: string; atom?: string; json?: string }, ): Feed { const iconUrl = `${baseUrl}/favicon/${feedId}`; const feed = new Feed({ @@ -52,6 +52,7 @@ function buildFeed( feedLinks: { rss: selfUrl?.rss ?? `${baseUrl}/rss/${feedId}`, atom: selfUrl?.atom ?? `${baseUrl}/atom/${feedId}`, + json: selfUrl?.json ?? `${baseUrl}/json/${feedId}`, }, author: feedConfig.author ? { @@ -127,3 +128,19 @@ export function generateAtomFeed( ).atom1(), ); } + +export function generateJsonFeed( + feedConfig: FeedConfig, + emails: EmailData[], + baseUrl: string, + feedId: string, + selfUrl?: string, +): string { + return buildFeed( + feedConfig, + emails, + baseUrl, + feedId, + selfUrl ? { json: selfUrl } : undefined, + ).json1(); +} diff --git a/src/infrastructure/http-cache.ts b/src/infrastructure/http-cache.ts new file mode 100644 index 0000000..a9839be --- /dev/null +++ b/src/infrastructure/http-cache.ts @@ -0,0 +1,68 @@ +import { FeedConfig, EmailData } from "../types"; + +export interface FeedValidators { + etag: string; + lastModified: string; + maxReceivedAt: number; +} + +/** + * Compute HTTP cache validators (ETag + Last-Modified) for a feed. + * The ETag is derived from the feed format prefix, feedId, email count, and max + * receivedAt, making it a strong deterministic validator that changes whenever + * the feed content changes. + */ +export function computeFeedValidators( + format: "rss" | "atom", + feedId: string, + feedConfig: FeedConfig, + emails: EmailData[], +): FeedValidators { + const maxReceivedAt = + emails.length > 0 + ? Math.max(...emails.map((e) => e.receivedAt)) + : (feedConfig.created_at ?? 0); + + const hash = `${format}-${feedId}-${emails.length}-${maxReceivedAt}`; + const etag = `"${hash}"`; + const lastModified = new Date(maxReceivedAt).toUTCString(); + + return { etag, lastModified, maxReceivedAt }; +} + +/** + * Returns true if the request carries a matching conditional GET header, + * meaning a 304 Not Modified response is appropriate. + */ +export function isNotModified( + req: Request, + validators: FeedValidators, +): boolean { + const ifNoneMatch = req.headers.get("If-None-Match"); + if (ifNoneMatch !== null) { + return ifNoneMatch === validators.etag; + } + + const ifModifiedSince = req.headers.get("If-Modified-Since"); + if (ifModifiedSince !== null) { + const clientTime = new Date(ifModifiedSince).getTime(); + return !isNaN(clientTime) && clientTime >= validators.maxReceivedAt; + } + + return false; +} + +/** + * Build a 304 Not Modified response with the standard cache validator headers. + */ +export function notModifiedResponse(validators: FeedValidators): Response { + return new Response(null, { + status: 304, + headers: { + ETag: validators.etag, + "Last-Modified": validators.lastModified, + "Cache-Control": "max-age=1800", + "X-Robots-Tag": "noindex", + }, + }); +} diff --git a/src/routes/admin.tsx b/src/routes/admin.tsx index 8740b0d..55c9974 100644 --- a/src/routes/admin.tsx +++ b/src/routes/admin.tsx @@ -18,6 +18,7 @@ import { } from "../infrastructure/urls"; import { feedsRouter } from "./admin/feeds"; import { emailsRouter } from "./admin/emails"; +import { handleOpml } from "./opml"; import { dashboardScript } from "../scripts/generated/dashboard"; type AppEnv = { Bindings: Env }; @@ -975,6 +976,9 @@ app.get("/", async (c) => { ); }); +// OPML export (admin-protected) +app.get("/opml", handleOpml); + // Mount sub-routers app.route("/feeds", feedsRouter); app.route("/", emailsRouter); diff --git a/src/routes/atom.test.ts b/src/routes/atom.test.ts index e1edd8a..b50299b 100644 --- a/src/routes/atom.test.ts +++ b/src/routes/atom.test.ts @@ -144,4 +144,117 @@ describe("Atom Feed Route", () => { expect(body).toContain('xmlns="http://www.w3.org/2005/Atom"'); }); }); + + describe("conditional GET (ETag + Last-Modified)", () => { + const FEED_ID = "test-feed-atom-cget"; + const EMAIL_RECEIVED_AT = 1700000001000; + + beforeEach(async () => { + const emailKey = `feed:${FEED_ID}:${EMAIL_RECEIVED_AT}`; + await mockEnv.EMAIL_STORAGE.put( + emailKey, + JSON.stringify({ + subject: "Atom Subject", + from: "Sender ", + content: "

Body

", + receivedAt: EMAIL_RECEIVED_AT, + headers: {}, + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:metadata`, + JSON.stringify({ + emails: [ + { + key: emailKey, + subject: "Atom Subject", + receivedAt: EMAIL_RECEIVED_AT, + }, + ], + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:config`, + JSON.stringify({ + title: "Atom Cget Feed", + language: "en", + created_at: 1700000000000, + }), + ); + }); + + it("first GET returns 200 with ETag and Last-Modified headers", async () => { + const res = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + expect(res.status).toBe(200); + expect(res.headers.get("ETag")).toBeTruthy(); + expect(res.headers.get("Last-Modified")).toBeTruthy(); + }); + + it("GET with matching If-None-Match returns 304 with empty body", async () => { + const first = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + const etag = first.headers.get("ETag")!; + + const res = await testApp.request( + `/${FEED_ID}`, + { headers: { "If-None-Match": etag } }, + mockEnv, + ); + expect(res.status).toBe(304); + expect(await res.text()).toBe(""); + }); + + it("GET with If-Modified-Since in the future returns 304", async () => { + const future = new Date(EMAIL_RECEIVED_AT + 1000).toUTCString(); + const res = await testApp.request( + `/${FEED_ID}`, + { headers: { "If-Modified-Since": future } }, + mockEnv, + ); + expect(res.status).toBe(304); + }); + + it("stale If-None-Match after new email results in 200", async () => { + const first = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + const oldEtag = first.headers.get("ETag")!; + + const newReceivedAt = EMAIL_RECEIVED_AT + 5000; + const newEmailKey = `feed:${FEED_ID}:${newReceivedAt}`; + await mockEnv.EMAIL_STORAGE.put( + newEmailKey, + JSON.stringify({ + subject: "Newer Atom Email", + from: "Sender ", + content: "

New body

", + receivedAt: newReceivedAt, + headers: {}, + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:metadata`, + JSON.stringify({ + emails: [ + { + key: newEmailKey, + subject: "Newer Atom Email", + receivedAt: newReceivedAt, + }, + { + key: `feed:${FEED_ID}:${EMAIL_RECEIVED_AT}`, + subject: "Atom Subject", + receivedAt: EMAIL_RECEIVED_AT, + }, + ], + }), + ); + + const res = await testApp.request( + `/${FEED_ID}`, + { headers: { "If-None-Match": oldEtag } }, + mockEnv, + ); + expect(res.status).toBe(200); + const newEtag = res.headers.get("ETag"); + expect(newEtag).not.toBe(oldEtag); + }); + }); }); diff --git a/src/routes/atom.ts b/src/routes/atom.ts index f024ff8..a4c7a72 100644 --- a/src/routes/atom.ts +++ b/src/routes/atom.ts @@ -5,6 +5,11 @@ import { fetchFeedData } from "../application/feed-fetcher"; import { baseUrl, feedAtomUrl } from "../infrastructure/urls"; import { isExpired } from "../domain/feed"; import { FeedId } from "../domain/value-objects/feed-id"; +import { + computeFeedValidators, + isNotModified, + notModifiedResponse, +} from "../infrastructure/http-cache"; export async function handle(c: Context<{ Bindings: Env }>): Promise { try { @@ -21,6 +26,17 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise { return new Response("Feed has expired", { status: 410 }); } + const validators = computeFeedValidators( + "atom", + feedId, + feedData.feedConfig, + feedData.emails, + ); + + if (isNotModified(c.req.raw, validators)) { + return notModifiedResponse(validators); + } + const base = baseUrl(c.env); const selfUrl = new URL(c.req.url).origin + `/atom/${feedId}`; const atomXml = generateAtomFeed( @@ -42,6 +58,8 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise { "Cache-Control": "max-age=1800", "X-Robots-Tag": "noindex", Link: linkHeader, + ETag: validators.etag, + "Last-Modified": validators.lastModified, }, }); } catch (error) { diff --git a/src/routes/home.tsx b/src/routes/home.tsx index 564149c..3f7c17e 100644 --- a/src/routes/home.tsx +++ b/src/routes/home.tsx @@ -166,6 +166,7 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise { label="Forwarded (catch-all)" value={stats.emails_forwarded} /> + { + let testApp: Hono; + let mockEnv: Env; + + beforeEach(() => { + mockEnv = createMockEnv() as unknown as Env; + testApp = new Hono(); + testApp.get("/:feedId", handle); + }); + + describe("unknown feed", () => { + it("returns 404 when no metadata exists in KV", async () => { + const res = await testApp.request("/nonexistent-feed", {}, mockEnv); + expect(res.status).toBe(404); + expect(await res.text()).toBe("Feed not found"); + }); + }); + + describe("valid feed with no emails", () => { + beforeEach(async () => { + await mockEnv.EMAIL_STORAGE.put( + "feed:empty-feed:metadata", + JSON.stringify({ emails: [] }), + ); + }); + + it("returns 200 with application/feed+json content type", async () => { + const res = await testApp.request("/empty-feed", {}, mockEnv); + expect(res.status).toBe(200); + expect(res.headers.get("Content-Type")).toContain( + "application/feed+json", + ); + }); + + it("includes Cache-Control header", async () => { + const res = await testApp.request("/empty-feed", {}, mockEnv); + expect(res.headers.get("Cache-Control")).toBe("max-age=1800"); + }); + + it("sets X-Robots-Tag: noindex", async () => { + const res = await testApp.request("/empty-feed", {}, mockEnv); + expect(res.headers.get("X-Robots-Tag")).toBe("noindex"); + }); + + it("Link header advertises hub and self", async () => { + const res = await testApp.request("/empty-feed", {}, mockEnv); + const link = res.headers.get("Link") ?? ""; + expect(link).toContain(`rel="hub"`); + expect(link).toContain(`rel="self"`); + }); + + it("body parses as JSON with jsonfeed version 1.1", async () => { + const res = await testApp.request("/empty-feed", {}, mockEnv); + const body = (await res.json()) as { version: string; items: unknown[] }; + expect(body.version).toBe("https://jsonfeed.org/version/1"); + expect(Array.isArray(body.items)).toBe(true); + expect(body.items).toHaveLength(0); + }); + }); + + describe("valid feed with emails", () => { + const FEED_ID = "test-feed-json"; + const EMAIL_RECEIVED_AT = 1700000001000; + + beforeEach(async () => { + const emailKey = `feed:${FEED_ID}:${EMAIL_RECEIVED_AT}`; + await mockEnv.EMAIL_STORAGE.put( + emailKey, + JSON.stringify({ + subject: "JSON Feed Subject", + from: "Sender ", + content: "

Body content

", + receivedAt: EMAIL_RECEIVED_AT, + headers: {}, + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:metadata`, + JSON.stringify({ + emails: [ + { + key: emailKey, + subject: "JSON Feed Subject", + receivedAt: EMAIL_RECEIVED_AT, + }, + ], + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:config`, + JSON.stringify({ + title: "My JSON Feed", + language: "en", + created_at: 1700000000000, + }), + ); + }); + + it("returns 200 with items containing the seeded email", async () => { + const res = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + expect(res.status).toBe(200); + const body = (await res.json()) as { + version: string; + items: Array<{ title: string }>; + }; + expect(body.version).toBe("https://jsonfeed.org/version/1"); + expect(Array.isArray(body.items)).toBe(true); + expect(body.items).toHaveLength(1); + expect(body.items[0].title).toBe("JSON Feed Subject"); + }); + }); + + describe("expired feed", () => { + beforeEach(async () => { + const pastTimestamp = Date.now() - 1000 * 60 * 60 * 24; // 1 day ago + await mockEnv.EMAIL_STORAGE.put( + "feed:expired-feed:metadata", + JSON.stringify({ emails: [] }), + ); + await mockEnv.EMAIL_STORAGE.put( + "feed:expired-feed:config", + JSON.stringify({ + title: "Expired Feed", + language: "en", + created_at: pastTimestamp, + expires_at: pastTimestamp, + }), + ); + }); + + it("returns 410 for expired feed", async () => { + const res = await testApp.request("/expired-feed", {}, mockEnv); + expect(res.status).toBe(410); + expect(await res.text()).toBe("Feed has expired"); + }); + }); +}); diff --git a/src/routes/json.ts b/src/routes/json.ts new file mode 100644 index 0000000..0a74af2 --- /dev/null +++ b/src/routes/json.ts @@ -0,0 +1,51 @@ +import { Context } from "hono"; +import { Env } from "../types"; +import { generateJsonFeed } from "../infrastructure/feed-generator"; +import { fetchFeedData } from "../application/feed-fetcher"; +import { baseUrl } from "../infrastructure/urls"; +import { isExpired } from "../domain/feed"; +import { FeedId } from "../domain/value-objects/feed-id"; + +export async function handle(c: Context<{ Bindings: Env }>): Promise { + try { + const feedId = c.req.param("feedId"); + if (!feedId) { + return new Response("Feed ID is required", { status: 400 }); + } + + const feedData = await fetchFeedData(FeedId.unchecked(feedId), c.env); + if (!feedData) { + return new Response("Feed not found", { status: 404 }); + } + if (isExpired(feedData.feedConfig)) { + return new Response("Feed has expired", { status: 410 }); + } + + const base = baseUrl(c.env); + const selfUrl = new URL(c.req.url).origin + `/json/${feedId}`; + const jsonFeed = generateJsonFeed( + feedData.feedConfig, + feedData.emails, + base, + feedId, + selfUrl, + ); + const linkHeader = [ + `<${base}/hub>; rel="hub"`, + `<${selfUrl}>; rel="self"`, + ].join(", "); + + return new Response(jsonFeed, { + status: 200, + headers: { + "Content-Type": "application/feed+json", + "Cache-Control": "max-age=1800", + "X-Robots-Tag": "noindex", + Link: linkHeader, + }, + }); + } catch (error) { + console.error("Error generating JSON feed:", error); + return new Response("Internal Server Error", { status: 500 }); + } +} diff --git a/src/routes/opml.test.ts b/src/routes/opml.test.ts new file mode 100644 index 0000000..4fc9ea7 --- /dev/null +++ b/src/routes/opml.test.ts @@ -0,0 +1,139 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { Hono } from "hono"; +import app from "./admin"; +import { createMockEnv } from "../test/setup"; +import { Env } from "../types"; + +describe("OPML export — GET /admin/opml", () => { + let testApp: Hono; + let mockEnv: Env; + let request: (path: string, init?: RequestInit) => Promise; + let loginAndGetCookie: () => Promise; + + beforeEach(() => { + mockEnv = createMockEnv() as unknown as Env; + testApp = new Hono(); + testApp.route("/admin", app); + request = (path, init = {}) => + Promise.resolve(testApp.request(path, init, mockEnv)); + loginAndGetCookie = async () => { + const formData = new FormData(); + formData.append("password", "test-password"); + const response = await request("/admin/login", { + method: "POST", + body: formData, + }); + expect(response.status).toBe(302); + const setCookie = response.headers.get("Set-Cookie"); + expect(setCookie).toBeTruthy(); + return (setCookie as string).split(";")[0]; + }; + }); + + it("should return 302 redirect to login when not authenticated", async () => { + const res = await request("/admin/opml"); + expect(res.status).toBe(302); + expect(res.headers.get("Location")).toBe("/admin/login"); + }); + + it("should return 200 with OPML content when authenticated", async () => { + // Seed two feeds in the registry + await mockEnv.EMAIL_STORAGE.put( + "feeds:list", + JSON.stringify({ + feeds: [ + { id: "feed-abc", title: "My Newsletter", description: "Daily news" }, + { id: "feed-xyz", title: "Tech Digest" }, + ], + }), + ); + + const authCookie = await loginAndGetCookie(); + const res = await request("/admin/opml", { + headers: { + Cookie: authCookie, + Origin: "https://test.getmynews.app", + }, + }); + + expect(res.status).toBe(200); + const contentType = res.headers.get("Content-Type") ?? ""; + expect(contentType).toContain("text/x-opml"); + expect(res.headers.get("Content-Disposition")).toBe( + 'attachment; filename="feeds.opml"', + ); + expect(res.headers.get("X-Robots-Tag")).toBe("noindex"); + + const body = await res.text(); + + // Valid OPML 2.0 structure + expect(body).toContain(''); + expect(body).toContain(''); + expect(body).toContain(""); + expect(body).toContain("kill-the-news feeds"); + expect(body).toContain(""); + + // One outline per feed with correct xmlUrl + expect(body).toContain('type="rss"'); + expect(body).toContain('text="My Newsletter"'); + expect(body).toContain('title="My Newsletter"'); + expect(body).toContain('xmlUrl="https://test.getmynews.app/rss/feed-abc"'); + expect(body).toContain('description="Daily news"'); + + expect(body).toContain('text="Tech Digest"'); + expect(body).toContain('xmlUrl="https://test.getmynews.app/rss/feed-xyz"'); + + // feed-xyz has no description — attribute must not appear + const feedXyzLine = + body.split("\n").find((l) => l.includes("feed-xyz")) ?? ""; + expect(feedXyzLine).not.toContain("description="); + }); + + it("should XML-escape special characters in title and description", async () => { + await mockEnv.EMAIL_STORAGE.put( + "feeds:list", + JSON.stringify({ + feeds: [ + { + id: "feed-special", + title: "News & ", + description: 'Say "hello" & goodbye', + }, + ], + }), + ); + + const authCookie = await loginAndGetCookie(); + const res = await request("/admin/opml", { + headers: { + Cookie: authCookie, + Origin: "https://test.getmynews.app", + }, + }); + + expect(res.status).toBe(200); + const body = await res.text(); + + // Raw special chars must not appear unescaped in attribute values + const outlineLine = + body.split("\n").find((l) => l.includes("feed-special")) ?? ""; + expect(outlineLine).toContain("News & <Updates>"); + expect(outlineLine).toContain("Say "hello" & goodbye"); + expect(outlineLine).not.toContain('title="News & <'); + }); + + it("should return empty body element when there are no feeds", async () => { + const authCookie = await loginAndGetCookie(); + const res = await request("/admin/opml", { + headers: { + Cookie: authCookie, + Origin: "https://test.getmynews.app", + }, + }); + + expect(res.status).toBe(200); + const body = await res.text(); + expect(body).toContain(""); + expect(body).not.toContain(", and " with their XML entity equivalents. + */ +function escapeXmlAttr(value: string): string { + return value + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +/** + * Handler for GET /admin/opml + * Exports all feeds as an OPML 2.0 document. + * Protected by the admin auth middleware (inherits from admin Hono app). + */ +export async function handleOpml(c: Context<{ Bindings: Env }>) { + const env = c.env; + const feeds = await FeedRepository.from(env).listFeeds(); + + const outlines = feeds + .map((feed) => { + const title = escapeXmlAttr(feed.title); + const xmlUrl = escapeXmlAttr(feedRssUrl(feed.id, env)); + const descAttr = feed.description + ? ` description="${escapeXmlAttr(feed.description)}"` + : ""; + return ` `; + }) + .join("\n"); + + const opml = ` + + + kill-the-news feeds + + +${outlines} + +`; + + return new Response(opml, { + status: 200, + headers: { + "Content-Type": "text/x-opml; charset=utf-8", + "Content-Disposition": 'attachment; filename="feeds.opml"', + "X-Robots-Tag": "noindex", + }, + }); +} diff --git a/src/routes/rss.test.ts b/src/routes/rss.test.ts index 68fd480..b18a4a2 100644 --- a/src/routes/rss.test.ts +++ b/src/routes/rss.test.ts @@ -53,4 +53,133 @@ describe("RSS Feed Route", () => { expect(link).toContain(`rel="self"`); }); }); + + describe("conditional GET (ETag + Last-Modified)", () => { + const FEED_ID = "test-feed-rss-cget"; + const EMAIL_RECEIVED_AT = 1700000001000; + + beforeEach(async () => { + const emailKey = `feed:${FEED_ID}:${EMAIL_RECEIVED_AT}`; + await mockEnv.EMAIL_STORAGE.put( + emailKey, + JSON.stringify({ + subject: "RSS Subject", + from: "Sender ", + content: "

Body

", + receivedAt: EMAIL_RECEIVED_AT, + headers: {}, + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:metadata`, + JSON.stringify({ + emails: [ + { + key: emailKey, + subject: "RSS Subject", + receivedAt: EMAIL_RECEIVED_AT, + }, + ], + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:config`, + JSON.stringify({ + title: "RSS Cget Feed", + language: "en", + created_at: 1700000000000, + }), + ); + }); + + it("first GET returns 200 with ETag and Last-Modified headers", async () => { + const res = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + expect(res.status).toBe(200); + expect(res.headers.get("ETag")).toBeTruthy(); + expect(res.headers.get("Last-Modified")).toBeTruthy(); + }); + + it("GET with matching If-None-Match returns 304 with empty body", async () => { + const first = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + const etag = first.headers.get("ETag")!; + + const res = await testApp.request( + `/${FEED_ID}`, + { headers: { "If-None-Match": etag } }, + mockEnv, + ); + expect(res.status).toBe(304); + expect(await res.text()).toBe(""); + }); + + it("GET with If-Modified-Since in the future returns 304", async () => { + const future = new Date(EMAIL_RECEIVED_AT + 1000).toUTCString(); + const res = await testApp.request( + `/${FEED_ID}`, + { headers: { "If-Modified-Since": future } }, + mockEnv, + ); + expect(res.status).toBe(304); + }); + + it("stale If-None-Match after new email results in 200", async () => { + // Get ETag before new email + const first = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + const oldEtag = first.headers.get("ETag")!; + + // Add a newer email + const newReceivedAt = EMAIL_RECEIVED_AT + 5000; + const newEmailKey = `feed:${FEED_ID}:${newReceivedAt}`; + await mockEnv.EMAIL_STORAGE.put( + newEmailKey, + JSON.stringify({ + subject: "Newer Email", + from: "Sender ", + content: "

New body

", + receivedAt: newReceivedAt, + headers: {}, + }), + ); + await mockEnv.EMAIL_STORAGE.put( + `feed:${FEED_ID}:metadata`, + JSON.stringify({ + emails: [ + { + key: newEmailKey, + subject: "Newer Email", + receivedAt: newReceivedAt, + }, + { + key: `feed:${FEED_ID}:${EMAIL_RECEIVED_AT}`, + subject: "RSS Subject", + receivedAt: EMAIL_RECEIVED_AT, + }, + ], + }), + ); + + const res = await testApp.request( + `/${FEED_ID}`, + { headers: { "If-None-Match": oldEtag } }, + mockEnv, + ); + expect(res.status).toBe(200); + const newEtag = res.headers.get("ETag"); + expect(newEtag).not.toBe(oldEtag); + }); + + it("RSS and Atom ETags for the same feed differ", async () => { + const rssRes = await testApp.request(`/${FEED_ID}`, {}, mockEnv); + const rssEtag = rssRes.headers.get("ETag")!; + + // Use a separate atom app to get the atom ETag + const { handle: atomHandle } = await import("./atom"); + const atomApp = new Hono(); + atomApp.get("/:feedId", atomHandle); + const atomRes = await atomApp.request(`/${FEED_ID}`, {}, mockEnv); + const atomEtag = atomRes.headers.get("ETag")!; + + expect(rssEtag).not.toBe(atomEtag); + }); + }); }); diff --git a/src/routes/rss.ts b/src/routes/rss.ts index c90f6ab..4ebd1d1 100644 --- a/src/routes/rss.ts +++ b/src/routes/rss.ts @@ -5,6 +5,11 @@ import { fetchFeedData } from "../application/feed-fetcher"; import { baseUrl, feedRssUrl } from "../infrastructure/urls"; import { isExpired } from "../domain/feed"; import { FeedId } from "../domain/value-objects/feed-id"; +import { + computeFeedValidators, + isNotModified, + notModifiedResponse, +} from "../infrastructure/http-cache"; export async function handle(c: Context<{ Bindings: Env }>): Promise { try { @@ -21,6 +26,17 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise { return new Response("Feed has expired", { status: 410 }); } + const validators = computeFeedValidators( + "rss", + feedId, + feedData.feedConfig, + feedData.emails, + ); + + if (isNotModified(c.req.raw, validators)) { + return notModifiedResponse(validators); + } + const base = baseUrl(c.env); const selfUrl = new URL(c.req.url).origin + `/rss/${feedId}`; const rssXml = generateRssFeed( @@ -42,6 +58,8 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise { "Cache-Control": "max-age=1800", "X-Robots-Tag": "noindex", Link: linkHeader, + ETag: validators.etag, + "Last-Modified": validators.lastModified, }, }); } catch (error) { diff --git a/src/types/index.ts b/src/types/index.ts index b9986ca..be654a4 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -68,6 +68,8 @@ export interface EmailMetadata { size?: number; attachmentIds?: string[]; // Downloadable attachments (shown to the user) inlineAttachmentIds?: string[]; // Inline images: hidden from lists, still cleaned up + messageId?: string; // RFC 2822 Message-ID header (dedup primary key) + dedupHash?: string; // SHA-256 hex of normalized subject+content (dedup fallback) } // Feed list interface @@ -92,6 +94,7 @@ export interface Counters { // Subset of emails_rejected: non-feed mail forwarded to FALLBACK_FORWARD_ADDRESS // instead of dropped. Dropped count = emails_rejected − emails_forwarded. emails_forwarded: number; + emails_deduplicated: number; // Duplicate deliveries silently skipped (not stored) unsubscribes_sent: number; last_email_at?: string; // ISO 8601 last_feed_created_at?: string; // ISO 8601