feat(attachments): render inline cid images in place, not as attachments

Inline images (referenced by src="cid:…") are now classified at ingest and
kept out of the downloadable attachment lists, RSS/Atom enclosures, and the
API — while still stored in R2 and cleaned up with the email. Fixes the admin
email preview, which injected raw HTML into the data: iframe so cid refs never
resolved; it now rewrites them to absolute /files URLs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-24 14:39:59 +02:00
parent be45e70571
commit 5137637181
14 changed files with 277 additions and 31 deletions
+2 -2
View File
@@ -127,10 +127,10 @@ src/
All data lives in the `EMAIL_STORAGE` KV namespace:
| Key | Value |
| --------------------------- | ------------------------------------------------------------------------ |
| --------------------------- | ---------------------------------------------------------------------------------------------- |
| `feeds:list` | `{ feeds: Array<{ id, title, description?, expires_at? }> }` |
| `feed:<feedId>:config` | `FeedConfig` |
| `feed:<feedId>:metadata` | `{ emails: Array<{ key, subject, receivedAt, size?, attachmentIds? }> }` |
| `feed:<feedId>:metadata` | `{ emails: Array<{ key, subject, receivedAt, size?, attachmentIds?, inlineAttachmentIds? }> }` |
| `feed:<feedId>:<timestamp>` | Full `EmailData` |
| `websub:subs:<feedId>` | `WebSubSubscription[]` (per-feed subscriber list) |
| `icon:<domain>` | Cached favicon record (base64 + content type; negative entries allowed) |
+2
View File
@@ -128,6 +128,8 @@ FEED_MAX_SIZE_BYTES = "524288" # 512 KB — adjust as needed
When an incoming email contains attachments, the Worker can store them in a Cloudflare R2 bucket and expose them as `<enclosure>` elements in the RSS feed (and `<link rel="enclosure">` in Atom). Each attachment is served at `/files/{id}/{filename}` with an immutable cache header. Attachments are also listed with download links on the admin email detail page and the public entry view.
Inline images (the ones an email references with `src="cid:…"`) are handled separately: they are still stored in R2 (and deleted with the email), but instead of appearing in the attachment list they render in place — the `cid:` reference is rewritten to the stored `/files/{id}/{filename}` URL in the feed, the admin preview, and the public entry view.
This feature is **optional**. If no R2 bucket is bound, attachments are silently ignored and nothing else changes.
**Setup (automated):** `setup.sh` now asks _"Enable email attachments stored in R2?"_. Answer yes and it creates the buckets (`<worker>-attachments` and `<worker>-attachments-preview`) and wires the binding into the generated `wrangler.toml` for you.
+98
View File
@@ -437,6 +437,104 @@ describe("processEmail — attachments", () => {
expect(typeof metadata.emails[0].attachmentIds[0]).toBe("string");
});
it("classifies a cid-referenced image as inline, not a downloadable attachment", async () => {
const env = createMockEnv({ withR2: true });
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
const inlineImage: RawAttachment = {
filename: "logo.png",
contentType: "image/png",
content: new TextEncoder().encode("PNG").buffer as ArrayBuffer,
contentId: "logo123",
};
await processEmail(
makeInput({
content: '<p>Hi</p><img src="cid:logo123"/>',
attachments: [inlineImage, pdfAttachment],
}),
env as any,
);
const metadata = await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
);
const emailData = await env.EMAIL_STORAGE.get(
metadata.emails[0].key,
"json",
);
const inline = emailData.attachments.find(
(a: any) => a.filename === "logo.png",
);
const pdf = emailData.attachments.find(
(a: any) => a.filename === "report.pdf",
);
expect(inline.inline).toBe(true);
expect(pdf.inline).toBeUndefined();
// Metadata splits ids: the pdf is downloadable, the logo is inline-only.
expect(metadata.emails[0].attachmentIds).toEqual([pdf.id]);
expect(metadata.emails[0].inlineAttachmentIds).toEqual([inline.id]);
});
it("deletes inline image R2 objects when a trimmed email had them", async () => {
const env = createMockEnv({ withR2: true });
const mockR2 = (env as any).ATTACHMENT_BUCKET as unknown as MockR2;
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
const oldKey = `feed:${VALID_FEED_ID}:111`;
const inlineId = "old-inline-uuid";
const oldEmail = JSON.stringify({
subject: "Old",
from: "a@b.com",
content: "x".repeat(200) + '<img src="cid:c"/>',
receivedAt: 111,
headers: {},
attachments: [
{
id: inlineId,
filename: "logo.png",
contentType: "image/png",
size: 100,
contentId: "c",
inline: true,
},
],
});
await env.EMAIL_STORAGE.put(oldKey, oldEmail);
await mockR2.put(inlineId, new ArrayBuffer(100));
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:metadata`,
JSON.stringify({
emails: [
{
key: oldKey,
subject: "Old",
receivedAt: 111,
size: oldEmail.length,
inlineAttachmentIds: [inlineId],
},
],
}),
);
const tinyEnv = { ...env, FEED_MAX_SIZE_BYTES: "50" };
const res = await processEmail(
makeInput({ subject: "New" }),
tinyEnv as any,
);
expect(res.ok).toBe(true);
expect(mockR2._has(inlineId)).toBe(false);
});
it("deletes R2 objects when a trimmed email had attachments", async () => {
const env = createMockEnv({ withR2: true });
const mockR2 = (env as any).ATTACHMENT_BUCKET as unknown as MockR2;
+15 -6
View File
@@ -5,6 +5,8 @@ import { dispatchFeedEvents } from "../application/feed-events";
import { extractEmailDomain } from "../infrastructure/favicon-fetcher";
import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe";
import { getAttachmentBucket } from "../infrastructure/attachments";
import { extractInlineCids } from "../infrastructure/html-processor";
import { attachmentIdsForCleanup } from "./feed-cleanup";
import { FeedRepository } from "../infrastructure/feed-repository";
import { BackgroundScheduler } from "../infrastructure/worker";
import { Feed } from "../domain/feed.aggregate";
@@ -47,14 +49,16 @@ export type IngestResult =
async function uploadAttachments(
attachments: RawAttachment[],
bucket: R2Bucket,
inlineCids: Set<string>,
): Promise<AttachmentData[]> {
return Promise.all(
attachments.map(async (att) => {
const id = crypto.randomUUID();
const inline = att.contentId ? inlineCids.has(att.contentId) : false;
await bucket.put(id, att.content, {
httpMetadata: {
contentType: att.contentType,
contentDisposition: `attachment; filename="${att.filename}"`,
contentDisposition: `${inline ? "inline" : "attachment"}; filename="${att.filename}"`,
},
});
return {
@@ -63,6 +67,7 @@ async function uploadAttachments(
contentType: att.contentType,
size: att.content.byteLength,
...(att.contentId ? { contentId: att.contentId } : {}),
...(inline ? { inline: true } : {}),
};
}),
);
@@ -111,9 +116,10 @@ async function storeEmail(
ctx?: ExecutionContext,
): Promise<void> {
const attachmentBucket = getAttachmentBucket(env);
const inlineCids = extractInlineCids(input.content);
const storedAttachments: AttachmentData[] =
attachmentBucket && input.attachments?.length
? await uploadAttachments(input.attachments, attachmentBucket)
? await uploadAttachments(input.attachments, attachmentBucket, inlineCids)
: [];
const emailData = {
@@ -132,14 +138,17 @@ async function storeEmail(
const serialisedSize = new TextEncoder().encode(
JSON.stringify(emailData),
).byteLength;
const downloadableIds = storedAttachments
.filter((a) => !a.inline)
.map((a) => a.id);
const inlineIds = storedAttachments.filter((a) => a.inline).map((a) => a.id);
const newEntry: EmailMetadata = {
key: emailKey,
subject: emailData.subject,
receivedAt: emailData.receivedAt,
size: serialisedSize,
...(storedAttachments.length > 0
? { attachmentIds: storedAttachments.map((a) => a.id) }
: {}),
...(downloadableIds.length > 0 ? { attachmentIds: downloadableIds } : {}),
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
};
// Track the latest sender's domain (feed icon) and capture the RFC 8058
@@ -166,7 +175,7 @@ async function storeEmail(
const r2Deletions =
attachmentBucket && dropped.length > 0
? dropped
.flatMap((e) => e.attachmentIds ?? [])
.flatMap((e) => attachmentIdsForCleanup(e))
.map((id) => attachmentBucket.delete(id))
: [];
+9 -2
View File
@@ -4,9 +4,16 @@ import { getAttachmentBucket } from "../infrastructure/attachments";
import { FeedRepository } from "../infrastructure/feed-repository";
import { FeedId } from "../domain/value-objects/feed-id";
// All R2 object ids an email owns — both downloadable attachments and inline
// images. Inline images are hidden from the user-facing lists but must still be
// purged from the bucket when the email is deleted.
export function attachmentIdsForCleanup(e: EmailMetadata): string[] {
return [...(e.attachmentIds ?? []), ...(e.inlineAttachmentIds ?? [])];
}
// Delete the R2 attachments belonging to the given email keys. Call before the
// emails are removed from feed metadata, while `emails` still carries their
// attachmentIds.
// attachment ids.
export async function deleteAttachmentsForEmails(
env: Env,
emails: readonly EmailMetadata[],
@@ -15,7 +22,7 @@ export async function deleteAttachmentsForEmails(
const keySet = new Set(keys);
const attachmentIds = emails
.filter((e) => keySet.has(e.key))
.flatMap((e) => e.attachmentIds ?? []);
.flatMap((e) => attachmentIdsForCleanup(e));
if (attachmentIds.length === 0) return;
const bucket = getAttachmentBucket(env);
+2 -1
View File
@@ -54,7 +54,8 @@ function buildFeed(
for (const email of emails) {
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
const firstAttachment = email.attachments?.[0];
// Inline images are rendered in the body, not surfaced as an enclosure.
const firstAttachment = email.attachments?.find((a) => !a.inline);
const bodyContent = processEmailContent(
email.content,
email.attachments,
+21 -1
View File
@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { processEmailContent } from "./html-processor";
import { processEmailContent, extractInlineCids } from "./html-processor";
import type { AttachmentData } from "../types";
describe("processEmailContent — body extraction", () => {
@@ -196,3 +196,23 @@ describe("processEmailContent — inline cid: rewriting", () => {
expect(result).toContain('src="https://example.com/a.png"');
});
});
describe("extractInlineCids", () => {
it("collects normalized cids referenced by cid: image sources", () => {
const html = '<body><img src="cid:ii_abc"/><img src="CID:ii_def"/></body>';
expect(extractInlineCids(html)).toEqual(new Set(["ii_abc", "ii_def"]));
});
it("ignores non-cid sources", () => {
const html = '<body><img src="https://example.com/a.png"/></body>';
expect(extractInlineCids(html).size).toBe(0);
});
it("returns an empty set for plain text", () => {
expect(extractInlineCids("just text, no html").size).toBe(0);
});
it("returns an empty set for empty input", () => {
expect(extractInlineCids("").size).toBe(0);
});
});
+16
View File
@@ -12,6 +12,22 @@ export function normalizeCid(
return trimmed || undefined;
}
// Collect the normalized Content-IDs referenced by `cid:` image sources in the
// email body — exactly the set rewriteCidSrc would turn into inline <img> URLs.
// Used at ingest to flag those attachments as inline (rendered in place, hidden
// from the downloadable attachment lists).
export function extractInlineCids(content: string): Set<string> {
const cids = new Set<string>();
if (!content || isPlainText(content)) return cids;
const { document } = parseHTML(content);
document.querySelectorAll("[src]").forEach((el: Element) => {
const match = (el.getAttribute("src") ?? "").match(/^\s*cid:(.+)$/i);
const cid = match ? normalizeCid(match[1]) : undefined;
if (cid) cids.add(cid);
});
return cids;
}
function cleanMsoStyles(style: string): string {
return style
.split(";")
+45
View File
@@ -766,6 +766,51 @@ describe("Admin Routes", () => {
expect(body).toContain("2.0 KB");
});
it("renders inline cid images in place and hides them from the attachments list", async () => {
const authCookie = await loginAndGetCookie();
const feedId = "detail-feed";
const emailKey = `feed:${feedId}:3`;
await mockEnv.EMAIL_STORAGE.put(
emailKey,
JSON.stringify({
subject: "With inline image",
from: "sender@example.com",
content: '<p>hello</p><img src="cid:logo123"/>',
receivedAt: 3,
headers: {},
attachments: [
{
id: "img-1",
filename: "logo.png",
contentType: "image/png",
size: 512,
contentId: "logo123",
inline: true,
},
],
}),
);
const res = await request(`/admin/emails/${emailKey}`, {
headers: { Cookie: authCookie },
});
expect(res.status).toBe(200);
const body = await res.text();
// The rendered preview is a base64 data: iframe; decode and inspect it.
const match = body.match(/data:text\/html;base64,([A-Za-z0-9+/=]+)/);
expect(match).not.toBeNull();
const decoded = Buffer.from(match![1], "base64").toString("utf-8");
// cid: is rewritten to an absolute /files URL so it resolves in the iframe.
expect(decoded).toContain(
"https://test.getmynews.app/files/img-1/logo.png",
);
expect(decoded).not.toContain("cid:logo123");
// Inline image is not surfaced as a downloadable attachment.
expect(body).not.toContain("Attachments");
});
it("does not render an attachments section when the email has none", async () => {
const authCookie = await loginAndGetCookie();
const feedId = "detail-feed";
+13 -2
View File
@@ -12,7 +12,9 @@ import {
feedRssUrl,
feedAtomUrl,
feedEmailAddress,
baseUrl,
} from "../../infrastructure/urls";
import { processEmailContent } from "../../infrastructure/html-processor";
import { formatBytes } from "../../domain/format";
import { EmailAddress } from "../../domain/value-objects/email-address";
import { emailsPageScript } from "../../scripts/generated/emails-page";
@@ -463,9 +465,18 @@ emailsRouter.get("/emails/:emailKey", async (c) => {
if (!emailData) return c.text("Email not found", 404);
const feedId = repo.feedIdFromEmailKey(emailKey);
const attachments = emailData.attachments ?? [];
// Inline images render in place; only downloadable attachments go in the list.
const attachments = (emailData.attachments ?? []).filter((a) => !a.inline);
const htmlContent = `<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><style>body{font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','SF Pro Display','Helvetica Neue',Arial,sans-serif;line-height:1.5;padding:16px;margin:0;color:#333;box-sizing:border-box}img{max-width:100%;height:auto}a{color:#0070f3}@media(prefers-color-scheme:dark){body{background-color:#1c1c1e;color:#ffffff}a{color:#0a84ff}}</style></head><body>${emailData.content}</body></html>`;
// The rendered preview lives in a `data:` iframe, which has no origin to
// resolve relative URLs against — so cid: refs must be rewritten to absolute
// /files URLs (and the content sanitized) before embedding.
const renderedBody = processEmailContent(
emailData.content,
emailData.attachments,
baseUrl(env),
);
const htmlContent = `<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><style>body{font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','SF Pro Display','Helvetica Neue',Arial,sans-serif;line-height:1.5;padding:16px;margin:0;color:#333;box-sizing:border-box}img{max-width:100%;height:auto}a{color:#0070f3}@media(prefers-color-scheme:dark){body{background-color:#1c1c1e;color:#ffffff}a{color:#0a84ff}}</style></head><body>${renderedBody}</body></html>`;
const encodedHtmlContent = (() => {
const encoder = new TextEncoder();
+3 -1
View File
@@ -325,7 +325,9 @@ apiApp.openapi(
from: data.from,
receivedAt: data.receivedAt,
content: data.content,
attachments: (data.attachments ?? []).map((a) => ({
attachments: (data.attachments ?? [])
.filter((a) => !a.inline)
.map((a) => ({
id: a.id,
filename: a.filename,
contentType: a.contentType,
+29 -1
View File
@@ -20,14 +20,17 @@ async function seedFeed(
filename: string;
contentType: string;
size: number;
contentId?: string;
inline?: boolean;
}[],
content = "<p>Email body</p>",
) {
await env.EMAIL_STORAGE.put(
EMAIL_KEY,
JSON.stringify({
subject: "Test Subject",
from: "sender@example.com",
content: "<p>Email body</p>",
content,
receivedAt: RECEIVED_AT,
headers: {},
...(attachments ? { attachments } : {}),
@@ -126,6 +129,31 @@ describe("GET /entries/:feedId/:entryId", () => {
expect(body).toContain("2.0 KB");
});
it("renders inline images in place and omits them from the attachments list", async () => {
await seedFeed(
env,
[
{
id: "img-1",
filename: "logo.png",
contentType: "image/png",
size: 512,
contentId: "logo123",
inline: true,
},
],
'<p>Body</p><img src="cid:logo123"/>',
);
const app = makeApp();
const res = await app.request(`/${FEED_ID}/${RECEIVED_AT}`, {}, env as any);
const body = await res.text();
// The cid: ref is rewritten to the stored file URL (rendered in place)…
expect(body).toContain('src="/files/img-1/logo.png"');
expect(body).not.toContain("cid:logo123");
// …and the image is not listed as a downloadable attachment.
expect(body).not.toContain("Attachments");
});
it("does not render an attachments section when there are none", async () => {
await seedFeed(env);
const app = makeApp();
+3 -1
View File
@@ -46,7 +46,9 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise<Response> {
"default-src 'none'; style-src 'unsafe-inline'; img-src *; frame-src 'none'",
);
const attachments = emailData.attachments ?? [];
// Inline images render in place (cid: refs are rewritten by processEmailContent);
// only genuine, downloadable attachments belong in the list below.
const attachments = (emailData.attachments ?? []).filter((a) => !a.inline);
const attachmentsSection = attachments.length
? html`<section class="attachments">
<h2>Attachments</h2>
+6 -1
View File
@@ -19,6 +19,10 @@ export interface AttachmentData {
contentType: string;
size: number;
contentId?: string; // Normalized Content-ID (no <>) used to resolve inline cid: refs
// True when this attachment is an inline image referenced by a cid: URL in the
// email body. Inline attachments render in place and are hidden from the
// downloadable attachment lists, but are still stored in R2 and cleaned up.
inline?: boolean;
}
// Email interface for stored emails
@@ -59,7 +63,8 @@ export interface EmailMetadata {
subject: string;
receivedAt: number;
size?: number;
attachmentIds?: string[];
attachmentIds?: string[]; // Downloadable attachments (shown to the user)
inlineAttachmentIds?: string[]; // Inline images: hidden from lists, still cleaned up
}
// Feed list interface