feat(attachments): render inline cid images in place, not as attachments

Inline images (referenced by src="cid:…") are now classified at ingest and
kept out of the downloadable attachment lists, RSS/Atom enclosures, and the
API — while still stored in R2 and cleaned up with the email. Fixes the admin
email preview, which injected raw HTML into the data: iframe so cid refs never
resolved; it now rewrites them to absolute /files URLs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-24 14:39:59 +02:00
parent be45e70571
commit 5137637181
14 changed files with 277 additions and 31 deletions
+98
View File
@@ -437,6 +437,104 @@ describe("processEmail — attachments", () => {
expect(typeof metadata.emails[0].attachmentIds[0]).toBe("string");
});
it("classifies a cid-referenced image as inline, not a downloadable attachment", async () => {
const env = createMockEnv({ withR2: true });
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
const inlineImage: RawAttachment = {
filename: "logo.png",
contentType: "image/png",
content: new TextEncoder().encode("PNG").buffer as ArrayBuffer,
contentId: "logo123",
};
await processEmail(
makeInput({
content: '<p>Hi</p><img src="cid:logo123"/>',
attachments: [inlineImage, pdfAttachment],
}),
env as any,
);
const metadata = await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
);
const emailData = await env.EMAIL_STORAGE.get(
metadata.emails[0].key,
"json",
);
const inline = emailData.attachments.find(
(a: any) => a.filename === "logo.png",
);
const pdf = emailData.attachments.find(
(a: any) => a.filename === "report.pdf",
);
expect(inline.inline).toBe(true);
expect(pdf.inline).toBeUndefined();
// Metadata splits ids: the pdf is downloadable, the logo is inline-only.
expect(metadata.emails[0].attachmentIds).toEqual([pdf.id]);
expect(metadata.emails[0].inlineAttachmentIds).toEqual([inline.id]);
});
it("deletes inline image R2 objects when a trimmed email had them", async () => {
const env = createMockEnv({ withR2: true });
const mockR2 = (env as any).ATTACHMENT_BUCKET as unknown as MockR2;
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
const oldKey = `feed:${VALID_FEED_ID}:111`;
const inlineId = "old-inline-uuid";
const oldEmail = JSON.stringify({
subject: "Old",
from: "a@b.com",
content: "x".repeat(200) + '<img src="cid:c"/>',
receivedAt: 111,
headers: {},
attachments: [
{
id: inlineId,
filename: "logo.png",
contentType: "image/png",
size: 100,
contentId: "c",
inline: true,
},
],
});
await env.EMAIL_STORAGE.put(oldKey, oldEmail);
await mockR2.put(inlineId, new ArrayBuffer(100));
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:metadata`,
JSON.stringify({
emails: [
{
key: oldKey,
subject: "Old",
receivedAt: 111,
size: oldEmail.length,
inlineAttachmentIds: [inlineId],
},
],
}),
);
const tinyEnv = { ...env, FEED_MAX_SIZE_BYTES: "50" };
const res = await processEmail(
makeInput({ subject: "New" }),
tinyEnv as any,
);
expect(res.ok).toBe(true);
expect(mockR2._has(inlineId)).toBe(false);
});
it("deletes R2 objects when a trimmed email had attachments", async () => {
const env = createMockEnv({ withR2: true });
const mockR2 = (env as any).ATTACHMENT_BUCKET as unknown as MockR2;
+15 -6
View File
@@ -5,6 +5,8 @@ import { dispatchFeedEvents } from "../application/feed-events";
import { extractEmailDomain } from "../infrastructure/favicon-fetcher";
import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe";
import { getAttachmentBucket } from "../infrastructure/attachments";
import { extractInlineCids } from "../infrastructure/html-processor";
import { attachmentIdsForCleanup } from "./feed-cleanup";
import { FeedRepository } from "../infrastructure/feed-repository";
import { BackgroundScheduler } from "../infrastructure/worker";
import { Feed } from "../domain/feed.aggregate";
@@ -47,14 +49,16 @@ export type IngestResult =
async function uploadAttachments(
attachments: RawAttachment[],
bucket: R2Bucket,
inlineCids: Set<string>,
): Promise<AttachmentData[]> {
return Promise.all(
attachments.map(async (att) => {
const id = crypto.randomUUID();
const inline = att.contentId ? inlineCids.has(att.contentId) : false;
await bucket.put(id, att.content, {
httpMetadata: {
contentType: att.contentType,
contentDisposition: `attachment; filename="${att.filename}"`,
contentDisposition: `${inline ? "inline" : "attachment"}; filename="${att.filename}"`,
},
});
return {
@@ -63,6 +67,7 @@ async function uploadAttachments(
contentType: att.contentType,
size: att.content.byteLength,
...(att.contentId ? { contentId: att.contentId } : {}),
...(inline ? { inline: true } : {}),
};
}),
);
@@ -111,9 +116,10 @@ async function storeEmail(
ctx?: ExecutionContext,
): Promise<void> {
const attachmentBucket = getAttachmentBucket(env);
const inlineCids = extractInlineCids(input.content);
const storedAttachments: AttachmentData[] =
attachmentBucket && input.attachments?.length
? await uploadAttachments(input.attachments, attachmentBucket)
? await uploadAttachments(input.attachments, attachmentBucket, inlineCids)
: [];
const emailData = {
@@ -132,14 +138,17 @@ async function storeEmail(
const serialisedSize = new TextEncoder().encode(
JSON.stringify(emailData),
).byteLength;
const downloadableIds = storedAttachments
.filter((a) => !a.inline)
.map((a) => a.id);
const inlineIds = storedAttachments.filter((a) => a.inline).map((a) => a.id);
const newEntry: EmailMetadata = {
key: emailKey,
subject: emailData.subject,
receivedAt: emailData.receivedAt,
size: serialisedSize,
...(storedAttachments.length > 0
? { attachmentIds: storedAttachments.map((a) => a.id) }
: {}),
...(downloadableIds.length > 0 ? { attachmentIds: downloadableIds } : {}),
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
};
// Track the latest sender's domain (feed icon) and capture the RFC 8058
@@ -166,7 +175,7 @@ async function storeEmail(
const r2Deletions =
attachmentBucket && dropped.length > 0
? dropped
.flatMap((e) => e.attachmentIds ?? [])
.flatMap((e) => attachmentIdsForCleanup(e))
.map((id) => attachmentBucket.delete(id))
: [];
+9 -2
View File
@@ -4,9 +4,16 @@ import { getAttachmentBucket } from "../infrastructure/attachments";
import { FeedRepository } from "../infrastructure/feed-repository";
import { FeedId } from "../domain/value-objects/feed-id";
// All R2 object ids an email owns — both downloadable attachments and inline
// images. Inline images are hidden from the user-facing lists but must still be
// purged from the bucket when the email is deleted.
export function attachmentIdsForCleanup(e: EmailMetadata): string[] {
return [...(e.attachmentIds ?? []), ...(e.inlineAttachmentIds ?? [])];
}
// Delete the R2 attachments belonging to the given email keys. Call before the
// emails are removed from feed metadata, while `emails` still carries their
// attachmentIds.
// attachment ids.
export async function deleteAttachmentsForEmails(
env: Env,
emails: readonly EmailMetadata[],
@@ -15,7 +22,7 @@ export async function deleteAttachmentsForEmails(
const keySet = new Set(keys);
const attachmentIds = emails
.filter((e) => keySet.has(e.key))
.flatMap((e) => e.attachmentIds ?? []);
.flatMap((e) => attachmentIdsForCleanup(e));
if (attachmentIds.length === 0) return;
const bucket = getAttachmentBucket(env);
+2 -1
View File
@@ -54,7 +54,8 @@ function buildFeed(
for (const email of emails) {
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
const firstAttachment = email.attachments?.[0];
// Inline images are rendered in the body, not surfaced as an enclosure.
const firstAttachment = email.attachments?.find((a) => !a.inline);
const bodyContent = processEmailContent(
email.content,
email.attachments,
+21 -1
View File
@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { processEmailContent } from "./html-processor";
import { processEmailContent, extractInlineCids } from "./html-processor";
import type { AttachmentData } from "../types";
describe("processEmailContent — body extraction", () => {
@@ -196,3 +196,23 @@ describe("processEmailContent — inline cid: rewriting", () => {
expect(result).toContain('src="https://example.com/a.png"');
});
});
describe("extractInlineCids", () => {
it("collects normalized cids referenced by cid: image sources", () => {
const html = '<body><img src="cid:ii_abc"/><img src="CID:ii_def"/></body>';
expect(extractInlineCids(html)).toEqual(new Set(["ii_abc", "ii_def"]));
});
it("ignores non-cid sources", () => {
const html = '<body><img src="https://example.com/a.png"/></body>';
expect(extractInlineCids(html).size).toBe(0);
});
it("returns an empty set for plain text", () => {
expect(extractInlineCids("just text, no html").size).toBe(0);
});
it("returns an empty set for empty input", () => {
expect(extractInlineCids("").size).toBe(0);
});
});
+16
View File
@@ -12,6 +12,22 @@ export function normalizeCid(
return trimmed || undefined;
}
// Collect the normalized Content-IDs referenced by `cid:` image sources in the
// email body — exactly the set rewriteCidSrc would turn into inline <img> URLs.
// Used at ingest to flag those attachments as inline (rendered in place, hidden
// from the downloadable attachment lists).
export function extractInlineCids(content: string): Set<string> {
const cids = new Set<string>();
if (!content || isPlainText(content)) return cids;
const { document } = parseHTML(content);
document.querySelectorAll("[src]").forEach((el: Element) => {
const match = (el.getAttribute("src") ?? "").match(/^\s*cid:(.+)$/i);
const cid = match ? normalizeCid(match[1]) : undefined;
if (cid) cids.add(cid);
});
return cids;
}
function cleanMsoStyles(style: string): string {
return style
.split(";")
+45
View File
@@ -766,6 +766,51 @@ describe("Admin Routes", () => {
expect(body).toContain("2.0 KB");
});
it("renders inline cid images in place and hides them from the attachments list", async () => {
const authCookie = await loginAndGetCookie();
const feedId = "detail-feed";
const emailKey = `feed:${feedId}:3`;
await mockEnv.EMAIL_STORAGE.put(
emailKey,
JSON.stringify({
subject: "With inline image",
from: "sender@example.com",
content: '<p>hello</p><img src="cid:logo123"/>',
receivedAt: 3,
headers: {},
attachments: [
{
id: "img-1",
filename: "logo.png",
contentType: "image/png",
size: 512,
contentId: "logo123",
inline: true,
},
],
}),
);
const res = await request(`/admin/emails/${emailKey}`, {
headers: { Cookie: authCookie },
});
expect(res.status).toBe(200);
const body = await res.text();
// The rendered preview is a base64 data: iframe; decode and inspect it.
const match = body.match(/data:text\/html;base64,([A-Za-z0-9+/=]+)/);
expect(match).not.toBeNull();
const decoded = Buffer.from(match![1], "base64").toString("utf-8");
// cid: is rewritten to an absolute /files URL so it resolves in the iframe.
expect(decoded).toContain(
"https://test.getmynews.app/files/img-1/logo.png",
);
expect(decoded).not.toContain("cid:logo123");
// Inline image is not surfaced as a downloadable attachment.
expect(body).not.toContain("Attachments");
});
it("does not render an attachments section when the email has none", async () => {
const authCookie = await loginAndGetCookie();
const feedId = "detail-feed";
+13 -2
View File
@@ -12,7 +12,9 @@ import {
feedRssUrl,
feedAtomUrl,
feedEmailAddress,
baseUrl,
} from "../../infrastructure/urls";
import { processEmailContent } from "../../infrastructure/html-processor";
import { formatBytes } from "../../domain/format";
import { EmailAddress } from "../../domain/value-objects/email-address";
import { emailsPageScript } from "../../scripts/generated/emails-page";
@@ -463,9 +465,18 @@ emailsRouter.get("/emails/:emailKey", async (c) => {
if (!emailData) return c.text("Email not found", 404);
const feedId = repo.feedIdFromEmailKey(emailKey);
const attachments = emailData.attachments ?? [];
// Inline images render in place; only downloadable attachments go in the list.
const attachments = (emailData.attachments ?? []).filter((a) => !a.inline);
const htmlContent = `<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><style>body{font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','SF Pro Display','Helvetica Neue',Arial,sans-serif;line-height:1.5;padding:16px;margin:0;color:#333;box-sizing:border-box}img{max-width:100%;height:auto}a{color:#0070f3}@media(prefers-color-scheme:dark){body{background-color:#1c1c1e;color:#ffffff}a{color:#0a84ff}}</style></head><body>${emailData.content}</body></html>`;
// The rendered preview lives in a `data:` iframe, which has no origin to
// resolve relative URLs against — so cid: refs must be rewritten to absolute
// /files URLs (and the content sanitized) before embedding.
const renderedBody = processEmailContent(
emailData.content,
emailData.attachments,
baseUrl(env),
);
const htmlContent = `<!DOCTYPE html><html><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><style>body{font-family:-apple-system,BlinkMacSystemFont,'SF Pro Text','SF Pro Display','Helvetica Neue',Arial,sans-serif;line-height:1.5;padding:16px;margin:0;color:#333;box-sizing:border-box}img{max-width:100%;height:auto}a{color:#0070f3}@media(prefers-color-scheme:dark){body{background-color:#1c1c1e;color:#ffffff}a{color:#0a84ff}}</style></head><body>${renderedBody}</body></html>`;
const encodedHtmlContent = (() => {
const encoder = new TextEncoder();
+9 -7
View File
@@ -325,13 +325,15 @@ apiApp.openapi(
from: data.from,
receivedAt: data.receivedAt,
content: data.content,
attachments: (data.attachments ?? []).map((a) => ({
id: a.id,
filename: a.filename,
contentType: a.contentType,
size: a.size,
url: `/files/${a.id}/${encodeURIComponent(a.filename)}`,
})),
attachments: (data.attachments ?? [])
.filter((a) => !a.inline)
.map((a) => ({
id: a.id,
filename: a.filename,
contentType: a.contentType,
size: a.size,
url: `/files/${a.id}/${encodeURIComponent(a.filename)}`,
})),
},
200,
);
+29 -1
View File
@@ -20,14 +20,17 @@ async function seedFeed(
filename: string;
contentType: string;
size: number;
contentId?: string;
inline?: boolean;
}[],
content = "<p>Email body</p>",
) {
await env.EMAIL_STORAGE.put(
EMAIL_KEY,
JSON.stringify({
subject: "Test Subject",
from: "sender@example.com",
content: "<p>Email body</p>",
content,
receivedAt: RECEIVED_AT,
headers: {},
...(attachments ? { attachments } : {}),
@@ -126,6 +129,31 @@ describe("GET /entries/:feedId/:entryId", () => {
expect(body).toContain("2.0 KB");
});
it("renders inline images in place and omits them from the attachments list", async () => {
await seedFeed(
env,
[
{
id: "img-1",
filename: "logo.png",
contentType: "image/png",
size: 512,
contentId: "logo123",
inline: true,
},
],
'<p>Body</p><img src="cid:logo123"/>',
);
const app = makeApp();
const res = await app.request(`/${FEED_ID}/${RECEIVED_AT}`, {}, env as any);
const body = await res.text();
// The cid: ref is rewritten to the stored file URL (rendered in place)…
expect(body).toContain('src="/files/img-1/logo.png"');
expect(body).not.toContain("cid:logo123");
// …and the image is not listed as a downloadable attachment.
expect(body).not.toContain("Attachments");
});
it("does not render an attachments section when there are none", async () => {
await seedFeed(env);
const app = makeApp();
+3 -1
View File
@@ -46,7 +46,9 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise<Response> {
"default-src 'none'; style-src 'unsafe-inline'; img-src *; frame-src 'none'",
);
const attachments = emailData.attachments ?? [];
// Inline images render in place (cid: refs are rewritten by processEmailContent);
// only genuine, downloadable attachments belong in the list below.
const attachments = (emailData.attachments ?? []).filter((a) => !a.inline);
const attachmentsSection = attachments.length
? html`<section class="attachments">
<h2>Attachments</h2>
+6 -1
View File
@@ -19,6 +19,10 @@ export interface AttachmentData {
contentType: string;
size: number;
contentId?: string; // Normalized Content-ID (no <>) used to resolve inline cid: refs
// True when this attachment is an inline image referenced by a cid: URL in the
// email body. Inline attachments render in place and are hidden from the
// downloadable attachment lists, but are still stored in R2 and cleaned up.
inline?: boolean;
}
// Email interface for stored emails
@@ -59,7 +63,8 @@ export interface EmailMetadata {
subject: string;
receivedAt: number;
size?: number;
attachmentIds?: string[];
attachmentIds?: string[]; // Downloadable attachments (shown to the user)
inlineAttachmentIds?: string[]; // Inline images: hidden from lists, still cleaned up
}
// Feed list interface