diff --git a/CLAUDE.md b/CLAUDE.md index 1fbb8a3..3c6f272 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -126,15 +126,15 @@ src/ All data lives in the `EMAIL_STORAGE` KV namespace: -| Key | Value | -| --------------------------- | ------------------------------------------------------------------------ | -| `feeds:list` | `{ feeds: Array<{ id, title, description?, expires_at? }> }` | -| `feed::config` | `FeedConfig` | -| `feed::metadata` | `{ emails: Array<{ key, subject, receivedAt, size?, attachmentIds? }> }` | -| `feed::` | Full `EmailData` | -| `websub:subs:` | `WebSubSubscription[]` (per-feed subscriber list) | -| `icon:` | Cached favicon record (base64 + content type; negative entries allowed) | -| `stats:counters` | `Counters` (cumulative monitoring counters singleton) | +| Key | Value | +| --------------------------- | ---------------------------------------------------------------------------------------------- | +| `feeds:list` | `{ feeds: Array<{ id, title, description?, expires_at? }> }` | +| `feed::config` | `FeedConfig` | +| `feed::metadata` | `{ emails: Array<{ key, subject, receivedAt, size?, attachmentIds?, inlineAttachmentIds? }> }` | +| `feed::` | Full `EmailData` | +| `websub:subs:` | `WebSubSubscription[]` (per-feed subscriber list) | +| `icon:` | Cached favicon record (base64 + content type; negative entries allowed) | +| `stats:counters` | `Counters` (cumulative monitoring counters singleton) | The KV key schema lives in `src/domain/feed-keys.ts` (pure, framework-agnostic) — never inline a `feed:`/`feeds:list`/`websub:`/`icon:`/`stats:counters` key string anywhere else. KV access is owned by four repository **adapters** in `src/infrastructure/`, each for one concern: `FeedRepository` (the Feed aggregate + global list + email bodies), `IconRepository` (`icon:*`), `WebSubSubscriptionRepository` (`websub:subs:*`), and `CountersRepository` (`stats:counters`). Go through a repository, never `env.EMAIL_STORAGE.get/put` directly. The domain depends only on the key schema, not on these adapters. diff --git a/INSTALL.md b/INSTALL.md index fa090c2..e6e489f 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -128,6 +128,8 @@ FEED_MAX_SIZE_BYTES = "524288" # 512 KB — adjust as needed When an incoming email contains attachments, the Worker can store them in a Cloudflare R2 bucket and expose them as `` elements in the RSS feed (and `` in Atom). Each attachment is served at `/files/{id}/{filename}` with an immutable cache header. Attachments are also listed with download links on the admin email detail page and the public entry view. +Inline images (the ones an email references with `src="cid:…"`) are handled separately: they are still stored in R2 (and deleted with the email), but instead of appearing in the attachment list they render in place — the `cid:` reference is rewritten to the stored `/files/{id}/{filename}` URL in the feed, the admin preview, and the public entry view. + This feature is **optional**. If no R2 bucket is bound, attachments are silently ignored and nothing else changes. **Setup (automated):** `setup.sh` now asks _"Enable email attachments stored in R2?"_. Answer yes and it creates the buckets (`-attachments` and `-attachments-preview`) and wires the binding into the generated `wrangler.toml` for you. diff --git a/src/application/email-processor.test.ts b/src/application/email-processor.test.ts index fce48ff..4eb349a 100644 --- a/src/application/email-processor.test.ts +++ b/src/application/email-processor.test.ts @@ -437,6 +437,104 @@ describe("processEmail — attachments", () => { expect(typeof metadata.emails[0].attachmentIds[0]).toBe("string"); }); + it("classifies a cid-referenced image as inline, not a downloadable attachment", async () => { + const env = createMockEnv({ withR2: true }); + await env.EMAIL_STORAGE.put( + `feed:${VALID_FEED_ID}:config`, + JSON.stringify({}), + ); + + const inlineImage: RawAttachment = { + filename: "logo.png", + contentType: "image/png", + content: new TextEncoder().encode("PNG").buffer as ArrayBuffer, + contentId: "logo123", + }; + + await processEmail( + makeInput({ + content: '

Hi

', + attachments: [inlineImage, pdfAttachment], + }), + env as any, + ); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + const emailData = await env.EMAIL_STORAGE.get( + metadata.emails[0].key, + "json", + ); + + const inline = emailData.attachments.find( + (a: any) => a.filename === "logo.png", + ); + const pdf = emailData.attachments.find( + (a: any) => a.filename === "report.pdf", + ); + expect(inline.inline).toBe(true); + expect(pdf.inline).toBeUndefined(); + + // Metadata splits ids: the pdf is downloadable, the logo is inline-only. + expect(metadata.emails[0].attachmentIds).toEqual([pdf.id]); + expect(metadata.emails[0].inlineAttachmentIds).toEqual([inline.id]); + }); + + it("deletes inline image R2 objects when a trimmed email had them", async () => { + const env = createMockEnv({ withR2: true }); + const mockR2 = (env as any).ATTACHMENT_BUCKET as unknown as MockR2; + await env.EMAIL_STORAGE.put( + `feed:${VALID_FEED_ID}:config`, + JSON.stringify({}), + ); + + const oldKey = `feed:${VALID_FEED_ID}:111`; + const inlineId = "old-inline-uuid"; + const oldEmail = JSON.stringify({ + subject: "Old", + from: "a@b.com", + content: "x".repeat(200) + '', + receivedAt: 111, + headers: {}, + attachments: [ + { + id: inlineId, + filename: "logo.png", + contentType: "image/png", + size: 100, + contentId: "c", + inline: true, + }, + ], + }); + await env.EMAIL_STORAGE.put(oldKey, oldEmail); + await mockR2.put(inlineId, new ArrayBuffer(100)); + await env.EMAIL_STORAGE.put( + `feed:${VALID_FEED_ID}:metadata`, + JSON.stringify({ + emails: [ + { + key: oldKey, + subject: "Old", + receivedAt: 111, + size: oldEmail.length, + inlineAttachmentIds: [inlineId], + }, + ], + }), + ); + + const tinyEnv = { ...env, FEED_MAX_SIZE_BYTES: "50" }; + const res = await processEmail( + makeInput({ subject: "New" }), + tinyEnv as any, + ); + expect(res.ok).toBe(true); + expect(mockR2._has(inlineId)).toBe(false); + }); + it("deletes R2 objects when a trimmed email had attachments", async () => { const env = createMockEnv({ withR2: true }); const mockR2 = (env as any).ATTACHMENT_BUCKET as unknown as MockR2; diff --git a/src/application/email-processor.ts b/src/application/email-processor.ts index ca3a045..1648b14 100644 --- a/src/application/email-processor.ts +++ b/src/application/email-processor.ts @@ -5,6 +5,8 @@ import { dispatchFeedEvents } from "../application/feed-events"; import { extractEmailDomain } from "../infrastructure/favicon-fetcher"; import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe"; import { getAttachmentBucket } from "../infrastructure/attachments"; +import { extractInlineCids } from "../infrastructure/html-processor"; +import { attachmentIdsForCleanup } from "./feed-cleanup"; import { FeedRepository } from "../infrastructure/feed-repository"; import { BackgroundScheduler } from "../infrastructure/worker"; import { Feed } from "../domain/feed.aggregate"; @@ -47,14 +49,16 @@ export type IngestResult = async function uploadAttachments( attachments: RawAttachment[], bucket: R2Bucket, + inlineCids: Set, ): Promise { return Promise.all( attachments.map(async (att) => { const id = crypto.randomUUID(); + const inline = att.contentId ? inlineCids.has(att.contentId) : false; await bucket.put(id, att.content, { httpMetadata: { contentType: att.contentType, - contentDisposition: `attachment; filename="${att.filename}"`, + contentDisposition: `${inline ? "inline" : "attachment"}; filename="${att.filename}"`, }, }); return { @@ -63,6 +67,7 @@ async function uploadAttachments( contentType: att.contentType, size: att.content.byteLength, ...(att.contentId ? { contentId: att.contentId } : {}), + ...(inline ? { inline: true } : {}), }; }), ); @@ -111,9 +116,10 @@ async function storeEmail( ctx?: ExecutionContext, ): Promise { const attachmentBucket = getAttachmentBucket(env); + const inlineCids = extractInlineCids(input.content); const storedAttachments: AttachmentData[] = attachmentBucket && input.attachments?.length - ? await uploadAttachments(input.attachments, attachmentBucket) + ? await uploadAttachments(input.attachments, attachmentBucket, inlineCids) : []; const emailData = { @@ -132,14 +138,17 @@ async function storeEmail( const serialisedSize = new TextEncoder().encode( JSON.stringify(emailData), ).byteLength; + const downloadableIds = storedAttachments + .filter((a) => !a.inline) + .map((a) => a.id); + const inlineIds = storedAttachments.filter((a) => a.inline).map((a) => a.id); const newEntry: EmailMetadata = { key: emailKey, subject: emailData.subject, receivedAt: emailData.receivedAt, size: serialisedSize, - ...(storedAttachments.length > 0 - ? { attachmentIds: storedAttachments.map((a) => a.id) } - : {}), + ...(downloadableIds.length > 0 ? { attachmentIds: downloadableIds } : {}), + ...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}), }; // Track the latest sender's domain (feed icon) and capture the RFC 8058 @@ -166,7 +175,7 @@ async function storeEmail( const r2Deletions = attachmentBucket && dropped.length > 0 ? dropped - .flatMap((e) => e.attachmentIds ?? []) + .flatMap((e) => attachmentIdsForCleanup(e)) .map((id) => attachmentBucket.delete(id)) : []; diff --git a/src/application/feed-cleanup.ts b/src/application/feed-cleanup.ts index 58f6a37..7a28b97 100644 --- a/src/application/feed-cleanup.ts +++ b/src/application/feed-cleanup.ts @@ -4,9 +4,16 @@ import { getAttachmentBucket } from "../infrastructure/attachments"; import { FeedRepository } from "../infrastructure/feed-repository"; import { FeedId } from "../domain/value-objects/feed-id"; +// All R2 object ids an email owns — both downloadable attachments and inline +// images. Inline images are hidden from the user-facing lists but must still be +// purged from the bucket when the email is deleted. +export function attachmentIdsForCleanup(e: EmailMetadata): string[] { + return [...(e.attachmentIds ?? []), ...(e.inlineAttachmentIds ?? [])]; +} + // Delete the R2 attachments belonging to the given email keys. Call before the // emails are removed from feed metadata, while `emails` still carries their -// attachmentIds. +// attachment ids. export async function deleteAttachmentsForEmails( env: Env, emails: readonly EmailMetadata[], @@ -15,7 +22,7 @@ export async function deleteAttachmentsForEmails( const keySet = new Set(keys); const attachmentIds = emails .filter((e) => keySet.has(e.key)) - .flatMap((e) => e.attachmentIds ?? []); + .flatMap((e) => attachmentIdsForCleanup(e)); if (attachmentIds.length === 0) return; const bucket = getAttachmentBucket(env); diff --git a/src/infrastructure/feed-generator.ts b/src/infrastructure/feed-generator.ts index 7d2bcbc..e959b3f 100644 --- a/src/infrastructure/feed-generator.ts +++ b/src/infrastructure/feed-generator.ts @@ -54,7 +54,8 @@ function buildFeed( for (const email of emails) { const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`; - const firstAttachment = email.attachments?.[0]; + // Inline images are rendered in the body, not surfaced as an enclosure. + const firstAttachment = email.attachments?.find((a) => !a.inline); const bodyContent = processEmailContent( email.content, email.attachments, diff --git a/src/infrastructure/html-processor.test.ts b/src/infrastructure/html-processor.test.ts index 376e931..2311866 100644 --- a/src/infrastructure/html-processor.test.ts +++ b/src/infrastructure/html-processor.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { processEmailContent } from "./html-processor"; +import { processEmailContent, extractInlineCids } from "./html-processor"; import type { AttachmentData } from "../types"; describe("processEmailContent — body extraction", () => { @@ -196,3 +196,23 @@ describe("processEmailContent — inline cid: rewriting", () => { expect(result).toContain('src="https://example.com/a.png"'); }); }); + +describe("extractInlineCids", () => { + it("collects normalized cids referenced by cid: image sources", () => { + const html = ''; + expect(extractInlineCids(html)).toEqual(new Set(["ii_abc", "ii_def"])); + }); + + it("ignores non-cid sources", () => { + const html = ''; + expect(extractInlineCids(html).size).toBe(0); + }); + + it("returns an empty set for plain text", () => { + expect(extractInlineCids("just text, no html").size).toBe(0); + }); + + it("returns an empty set for empty input", () => { + expect(extractInlineCids("").size).toBe(0); + }); +}); diff --git a/src/infrastructure/html-processor.ts b/src/infrastructure/html-processor.ts index 721081c..05b8564 100644 --- a/src/infrastructure/html-processor.ts +++ b/src/infrastructure/html-processor.ts @@ -12,6 +12,22 @@ export function normalizeCid( return trimmed || undefined; } +// Collect the normalized Content-IDs referenced by `cid:` image sources in the +// email body — exactly the set rewriteCidSrc would turn into inline URLs. +// Used at ingest to flag those attachments as inline (rendered in place, hidden +// from the downloadable attachment lists). +export function extractInlineCids(content: string): Set { + const cids = new Set(); + if (!content || isPlainText(content)) return cids; + const { document } = parseHTML(content); + document.querySelectorAll("[src]").forEach((el: Element) => { + const match = (el.getAttribute("src") ?? "").match(/^\s*cid:(.+)$/i); + const cid = match ? normalizeCid(match[1]) : undefined; + if (cid) cids.add(cid); + }); + return cids; +} + function cleanMsoStyles(style: string): string { return style .split(";") diff --git a/src/routes/admin.test.ts b/src/routes/admin.test.ts index 36a1fdc..cdd7f44 100644 --- a/src/routes/admin.test.ts +++ b/src/routes/admin.test.ts @@ -766,6 +766,51 @@ describe("Admin Routes", () => { expect(body).toContain("2.0 KB"); }); + it("renders inline cid images in place and hides them from the attachments list", async () => { + const authCookie = await loginAndGetCookie(); + const feedId = "detail-feed"; + const emailKey = `feed:${feedId}:3`; + await mockEnv.EMAIL_STORAGE.put( + emailKey, + JSON.stringify({ + subject: "With inline image", + from: "sender@example.com", + content: '

hello

', + receivedAt: 3, + headers: {}, + attachments: [ + { + id: "img-1", + filename: "logo.png", + contentType: "image/png", + size: 512, + contentId: "logo123", + inline: true, + }, + ], + }), + ); + + const res = await request(`/admin/emails/${emailKey}`, { + headers: { Cookie: authCookie }, + }); + expect(res.status).toBe(200); + const body = await res.text(); + + // The rendered preview is a base64 data: iframe; decode and inspect it. + const match = body.match(/data:text\/html;base64,([A-Za-z0-9+/=]+)/); + expect(match).not.toBeNull(); + const decoded = Buffer.from(match![1], "base64").toString("utf-8"); + // cid: is rewritten to an absolute /files URL so it resolves in the iframe. + expect(decoded).toContain( + "https://test.getmynews.app/files/img-1/logo.png", + ); + expect(decoded).not.toContain("cid:logo123"); + + // Inline image is not surfaced as a downloadable attachment. + expect(body).not.toContain("Attachments"); + }); + it("does not render an attachments section when the email has none", async () => { const authCookie = await loginAndGetCookie(); const feedId = "detail-feed"; diff --git a/src/routes/admin/emails.tsx b/src/routes/admin/emails.tsx index 9a0b105..9e9496f 100644 --- a/src/routes/admin/emails.tsx +++ b/src/routes/admin/emails.tsx @@ -12,7 +12,9 @@ import { feedRssUrl, feedAtomUrl, feedEmailAddress, + baseUrl, } from "../../infrastructure/urls"; +import { processEmailContent } from "../../infrastructure/html-processor"; import { formatBytes } from "../../domain/format"; import { EmailAddress } from "../../domain/value-objects/email-address"; import { emailsPageScript } from "../../scripts/generated/emails-page"; @@ -463,9 +465,18 @@ emailsRouter.get("/emails/:emailKey", async (c) => { if (!emailData) return c.text("Email not found", 404); const feedId = repo.feedIdFromEmailKey(emailKey); - const attachments = emailData.attachments ?? []; + // Inline images render in place; only downloadable attachments go in the list. + const attachments = (emailData.attachments ?? []).filter((a) => !a.inline); - const htmlContent = `${emailData.content}`; + // The rendered preview lives in a `data:` iframe, which has no origin to + // resolve relative URLs against — so cid: refs must be rewritten to absolute + // /files URLs (and the content sanitized) before embedding. + const renderedBody = processEmailContent( + emailData.content, + emailData.attachments, + baseUrl(env), + ); + const htmlContent = `${renderedBody}`; const encodedHtmlContent = (() => { const encoder = new TextEncoder(); diff --git a/src/routes/api/index.ts b/src/routes/api/index.ts index 81ead7f..022162c 100644 --- a/src/routes/api/index.ts +++ b/src/routes/api/index.ts @@ -325,13 +325,15 @@ apiApp.openapi( from: data.from, receivedAt: data.receivedAt, content: data.content, - attachments: (data.attachments ?? []).map((a) => ({ - id: a.id, - filename: a.filename, - contentType: a.contentType, - size: a.size, - url: `/files/${a.id}/${encodeURIComponent(a.filename)}`, - })), + attachments: (data.attachments ?? []) + .filter((a) => !a.inline) + .map((a) => ({ + id: a.id, + filename: a.filename, + contentType: a.contentType, + size: a.size, + url: `/files/${a.id}/${encodeURIComponent(a.filename)}`, + })), }, 200, ); diff --git a/src/routes/entries.test.ts b/src/routes/entries.test.ts index c6628a7..3f6a819 100644 --- a/src/routes/entries.test.ts +++ b/src/routes/entries.test.ts @@ -20,14 +20,17 @@ async function seedFeed( filename: string; contentType: string; size: number; + contentId?: string; + inline?: boolean; }[], + content = "

Email body

", ) { await env.EMAIL_STORAGE.put( EMAIL_KEY, JSON.stringify({ subject: "Test Subject", from: "sender@example.com", - content: "

Email body

", + content, receivedAt: RECEIVED_AT, headers: {}, ...(attachments ? { attachments } : {}), @@ -126,6 +129,31 @@ describe("GET /entries/:feedId/:entryId", () => { expect(body).toContain("2.0 KB"); }); + it("renders inline images in place and omits them from the attachments list", async () => { + await seedFeed( + env, + [ + { + id: "img-1", + filename: "logo.png", + contentType: "image/png", + size: 512, + contentId: "logo123", + inline: true, + }, + ], + '

Body

', + ); + const app = makeApp(); + const res = await app.request(`/${FEED_ID}/${RECEIVED_AT}`, {}, env as any); + const body = await res.text(); + // The cid: ref is rewritten to the stored file URL (rendered in place)… + expect(body).toContain('src="/files/img-1/logo.png"'); + expect(body).not.toContain("cid:logo123"); + // …and the image is not listed as a downloadable attachment. + expect(body).not.toContain("Attachments"); + }); + it("does not render an attachments section when there are none", async () => { await seedFeed(env); const app = makeApp(); diff --git a/src/routes/entries.ts b/src/routes/entries.ts index a072ced..384e906 100644 --- a/src/routes/entries.ts +++ b/src/routes/entries.ts @@ -46,7 +46,9 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise { "default-src 'none'; style-src 'unsafe-inline'; img-src *; frame-src 'none'", ); - const attachments = emailData.attachments ?? []; + // Inline images render in place (cid: refs are rewritten by processEmailContent); + // only genuine, downloadable attachments belong in the list below. + const attachments = (emailData.attachments ?? []).filter((a) => !a.inline); const attachmentsSection = attachments.length ? html`

Attachments

diff --git a/src/types/index.ts b/src/types/index.ts index 4757fb4..7fbbc5a 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -19,6 +19,10 @@ export interface AttachmentData { contentType: string; size: number; contentId?: string; // Normalized Content-ID (no <>) used to resolve inline cid: refs + // True when this attachment is an inline image referenced by a cid: URL in the + // email body. Inline attachments render in place and are hidden from the + // downloadable attachment lists, but are still stored in R2 and cleaned up. + inline?: boolean; } // Email interface for stored emails @@ -59,7 +63,8 @@ export interface EmailMetadata { subject: string; receivedAt: number; size?: number; - attachmentIds?: string[]; + attachmentIds?: string[]; // Downloadable attachments (shown to the user) + inlineAttachmentIds?: string[]; // Inline images: hidden from lists, still cleaned up } // Feed list interface