feat: reader-compat batch — JSON Feed, OPML export, conditional GET, dedup

Batch of four reader-facing improvements (TODO "Compat lecteurs + dedup"):

- JSON Feed at /json/:feedId (feed lib .json1()); all formats cross-link
- OPML export at /admin/opml (admin-protected; the registry lists every
  feed URL, so it must not be public)
- Conditional GET on /rss + /atom: strong ETag + Last-Modified, 304 on
  If-None-Match/If-Modified-Since, validators shared via http-cache.ts
- Duplicate-send dedup in ingestion: match by Message-ID, fall back to a
  SHA-256 of normalized subject+content; a duplicate is a no-op and bumps
  the new emails_deduplicated counter (status page + /api/v1/stats)

429 tests green, tsc clean, build dry-run OK. Docs (README/CLAUDE/TODO +
landing cards) updated.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-24 20:47:54 +02:00
parent 334713fbd9
commit 0abd5f306c
23 changed files with 1015 additions and 11 deletions
+54 -1
View File
@@ -109,12 +109,62 @@ async function loadAcceptingFeed(
return { ok: true, feed };
}
/**
* Compute a SHA-256 hex digest of a normalised string combining subject and
* content. Used as a dedup fallback when no Message-ID header is present.
* "Normalised" means lower-cased and all whitespace runs collapsed to a single
* space — so minor whitespace differences in re-sent mails still match.
*/
async function computeDedupHash(
subject: string,
content: string,
): Promise<string> {
const normalize = (s: string) => s.toLowerCase().replace(/\s+/g, " ").trim();
const raw = `${normalize(subject)}\n${normalize(content)}`;
const buf = await crypto.subtle.digest(
"SHA-256",
new TextEncoder().encode(raw),
);
return Array.from(new Uint8Array(buf))
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
}
/**
* Extract the Message-ID from request headers (case-insensitive key lookup).
* Returns undefined when absent or empty.
*/
function extractMessageId(
headers: Record<string, string> | undefined,
): string | undefined {
if (!headers) return undefined;
const value = Object.entries(headers).find(
([k]) => k.toLowerCase() === "message-id",
)?.[1];
const trimmed = value?.trim();
return trimmed || undefined;
}
async function storeEmail(
feed: Feed,
input: ProcessEmailInput,
env: Env,
ctx?: ExecutionContext,
): Promise<void> {
): Promise<boolean> {
// ── Dedup check ──────────────────────────────────────────────────────────
// Compute both dedup signals up-front (hash is async) so we only do it once.
const messageId = extractMessageId(input.headers);
const dedupHash = await computeDedupHash(input.subject, input.content);
if (feed.hasDuplicate(messageId, dedupHash)) {
logger.info("Duplicate email skipped", {
feedId: feed.id.value,
...(messageId ? { messageId } : { dedupHash }),
});
await bumpCounters(env.EMAIL_STORAGE, { emails_deduplicated: 1 });
return false; // signal: skipped (not stored)
}
const attachmentBucket = getAttachmentBucket(env);
const inlineCids = extractInlineCids(input.content);
const storedAttachments: AttachmentData[] =
@@ -149,6 +199,8 @@ async function storeEmail(
size: serialisedSize,
...(downloadableIds.length > 0 ? { attachmentIds: downloadableIds } : {}),
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
...(messageId ? { messageId } : {}),
dedupHash,
};
// Track the latest sender's domain (feed icon) and capture the RFC 8058
@@ -198,6 +250,7 @@ async function storeEmail(
? (p) => ctx.waitUntil(p)
: () => {};
await dispatchFeedEvents(feed, env, schedule);
return true; // signal: stored
}
export async function processEmail(