fix(attachments): render inline cid: images in emails and feeds

Capture each attachment's Content-ID at ingestion (postal-mime and
mailparser paths) and rewrite cid: image refs to the stored /files URL
in processEmailContent, shared by the entry view and RSS/Atom feeds.
Bodyless HTML fragments are now serialized so sanitization and the cid
rewrite apply to them too.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-23 18:42:04 +02:00
parent 6cd2d425a2
commit debbfc623e
9 changed files with 187 additions and 7 deletions
+2
View File
@@ -1,6 +1,7 @@
import PostalMime from "postal-mime";
import { Env } from "../types";
import { processEmail, RawAttachment } from "./email-processor";
import { normalizeCid } from "../utils/html-processor";
export async function handleCloudflareEmail(
message: ForwardableEmailMessage,
@@ -27,6 +28,7 @@ export async function handleCloudflareEmail(
filename: a.filename || "attachment",
contentType: a.mimeType || "application/octet-stream",
content: a.content as ArrayBuffer,
contentId: normalizeCid(a.contentId),
}));
await processEmail(
+2
View File
@@ -21,6 +21,7 @@ export interface RawAttachment {
filename: string;
contentType: string;
content: ArrayBuffer;
contentId?: string;
}
export interface ProcessEmailInput {
@@ -88,6 +89,7 @@ async function uploadAttachments(
filename: att.filename,
contentType: att.contentType,
size: att.content.byteLength,
...(att.contentId ? { contentId: att.contentId } : {}),
};
}),
);
+5 -1
View File
@@ -1,11 +1,14 @@
import { EmailParser } from "../utils/email-parser";
import { Env } from "../types";
import { processEmail, RawAttachment } from "./email-processor";
import { normalizeCid } from "../utils/html-processor";
export interface ForwardEmailAttachment {
filename?: string;
contentType?: string;
size?: number;
cid?: string;
contentId?: string;
content?: { type: "Buffer"; data: number[] } | ArrayBuffer | ArrayBufferView;
}
@@ -73,13 +76,14 @@ export async function handleForwardEmail(
const emailData = EmailParser.parseForwardEmailPayload(payload);
const rawAttachments: RawAttachment[] = (payload.attachments ?? [])
.map((a) => {
.map((a): RawAttachment | null => {
const buffer = toArrayBuffer(a.content);
if (!buffer) return null;
return {
filename: a.filename || "attachment",
contentType: a.contentType || "application/octet-stream",
content: buffer,
contentId: normalizeCid(a.cid ?? a.contentId),
};
})
.filter((a): a is RawAttachment => a !== null);
+3 -1
View File
@@ -140,7 +140,9 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise<Response> {
<dd>${new Date(emailData.receivedAt).toUTCString()}</dd>
</dl>
<div class="content">
${raw(processEmailContent(emailData.content))}
${raw(
processEmailContent(emailData.content, emailData.attachments),
)}
</div>
${attachmentsSection}
</body>
+41
View File
@@ -256,6 +256,47 @@ describe("POST /api/inbound — attachment upload", () => {
expect(mockR2._has(attachmentId)).toBe(true);
});
it("persists the attachment Content-ID and rewrites inline cid: images on the entry page", async () => {
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
const payload = makePayload({
html: '<p>hi</p><img src="cid:ii_mpi85rqy0" alt="pic"/>',
attachments: [
{
filename: "pic.png",
contentType: "image/png",
cid: "ii_mpi85rqy0",
content: { type: "Buffer", data: [137, 80, 78] },
},
],
});
const res = await worker.fetch(makeRequest(payload), env);
expect(res.status).toBe(200);
const metadata = (await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
{ type: "json" },
)) as any;
const emailData = (await env.EMAIL_STORAGE.get(metadata.emails[0].key, {
type: "json",
})) as any;
const attachmentId = emailData.attachments[0].id;
expect(emailData.attachments[0].contentId).toBe("ii_mpi85rqy0");
const entryRes = await worker.fetch(
new Request(
`https://${DOMAIN}/entries/${VALID_FEED_ID}/${metadata.emails[0].receivedAt}`,
),
env,
);
expect(entryRes.status).toBe(200);
const html = await entryRes.text();
expect(html).toContain(`/files/${attachmentId}/pic.png`);
expect(html).not.toContain("cid:ii_mpi85rqy0");
});
it("skips R2 when attachment content is null", async () => {
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
+1
View File
@@ -18,6 +18,7 @@ export interface AttachmentData {
filename: string;
contentType: string;
size: number;
contentId?: string; // Normalized Content-ID (no <>) used to resolve inline cid: refs
}
// Email interface for stored emails
+5 -1
View File
@@ -55,7 +55,11 @@ function buildFeed(
for (const email of emails) {
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
const firstAttachment = email.attachments?.[0];
const bodyContent = processEmailContent(email.content);
const bodyContent = processEmailContent(
email.content,
email.attachments,
baseUrl,
);
feed.addItem({
title: email.subject,
id: entryUrl,
+73
View File
@@ -1,5 +1,6 @@
import { describe, it, expect } from "vitest";
import { processEmailContent } from "./html-processor";
import type { AttachmentData } from "../types";
describe("processEmailContent — body extraction", () => {
it("extracts content inside <body> tags", () => {
@@ -123,3 +124,75 @@ describe("processEmailContent — mso style cleanup", () => {
expect(result).not.toContain("mso-font-size");
});
});
describe("processEmailContent — inline cid: rewriting", () => {
const attachment = (
overrides: Partial<AttachmentData> = {},
): AttachmentData => ({
id: "att-123",
filename: "chicken big.png",
contentType: "image/png",
size: 100,
contentId: "ii_mpi85rqy0",
...overrides,
});
it("rewrites cid: src to a relative /files URL when no baseUrl", () => {
const html = '<body><img src="cid:ii_mpi85rqy0" alt="x"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
expect(result).not.toContain("cid:");
});
it("rewrites cid: src to an absolute URL when baseUrl is given", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(
html,
[attachment()],
"https://feed.example",
);
expect(result).toContain(
'src="https://feed.example/files/att-123/chicken%20big.png"',
);
});
it("matches a stored Content-ID that has angle brackets", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html, [
attachment({ contentId: "<ii_mpi85rqy0>" }),
]);
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
});
it("is case-insensitive on the cid: scheme", () => {
const html = '<body><img src="CID:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
});
it("leaves unknown cid references unchanged", () => {
const html = '<body><img src="cid:unknown"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="cid:unknown"');
});
it("leaves cid references unchanged when no attachments are provided", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html);
expect(result).toContain('src="cid:ii_mpi85rqy0"');
});
it("ignores attachments without a contentId", () => {
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
const result = processEmailContent(html, [
attachment({ contentId: undefined }),
]);
expect(result).toContain('src="cid:ii_mpi85rqy0"');
});
it("does not touch normal http image sources", () => {
const html = '<body><img src="https://example.com/a.png"/></body>';
const result = processEmailContent(html, [attachment()]);
expect(result).toContain('src="https://example.com/a.png"');
});
});
+55 -4
View File
@@ -1,5 +1,16 @@
import { parseHTML } from "linkedom";
import escapeHtml from "escape-html";
import { AttachmentData } from "../types";
// Strip surrounding angle brackets and whitespace from a Content-ID so that a
// stored value like "<ii_mpi85rqy0>" matches an HTML reference "cid:ii_mpi85rqy0".
export function normalizeCid(
cid: string | null | undefined,
): string | undefined {
if (!cid) return undefined;
const trimmed = cid.trim().replace(/^<|>$/g, "").trim();
return trimmed || undefined;
}
function cleanMsoStyles(style: string): string {
return style
@@ -13,7 +24,27 @@ function isPlainText(content: string): boolean {
return !/<[a-z][\s\S]*>/i.test(content);
}
function sanitizeElement(el: Element): void {
function rewriteCidSrc(
el: Element,
cidMap: Map<string, AttachmentData>,
baseUrl: string,
): void {
const src = el.getAttribute("src") ?? "";
const match = src.match(/^\s*cid:(.+)$/i);
if (!match) return;
const attachment = cidMap.get(normalizeCid(match[1]) ?? "");
if (!attachment) return;
el.setAttribute(
"src",
`${baseUrl}/files/${attachment.id}/${encodeURIComponent(attachment.filename)}`,
);
}
function sanitizeElement(
el: Element,
cidMap: Map<string, AttachmentData>,
baseUrl: string,
): void {
// Snapshot attribute names before mutating (linkedom attributes is array-like)
const attrs = Array.from(
el.attributes as unknown as ArrayLike<{ name: string }>,
@@ -33,6 +64,9 @@ function sanitizeElement(el: Element): void {
}
}
}
if (cidMap.size > 0) {
rewriteCidSrc(el, cidMap, baseUrl);
}
// Strip mso-* inline style properties (Office HTML noise)
const style = el.getAttribute("style");
if (style !== null) {
@@ -52,22 +86,39 @@ function sanitizeElement(el: Element): void {
* - Removes dangerous elements: <script>, <iframe>, <object>, <embed>
* - Removes event handler attributes and javascript: URLs
* - Strips mso-* inline style properties (Office HTML)
* - Rewrites inline cid: image refs to the stored attachment URL. baseUrl=""
* yields relative URLs (entry page, same origin); a baseUrl yields absolute
* URLs (feeds, for external RSS readers).
*/
export function processEmailContent(content: string): string {
export function processEmailContent(
content: string,
attachments?: AttachmentData[],
baseUrl = "",
): string {
if (!content) return "";
if (isPlainText(content)) {
return `<pre style="white-space: pre-wrap; word-break: break-word;">${escapeHtml(content)}</pre>`;
}
const cidMap = new Map<string, AttachmentData>();
for (const att of attachments ?? []) {
const cid = normalizeCid(att.contentId);
if (cid) cidMap.set(cid, att);
}
const { document } = parseHTML(content);
document
.querySelectorAll("script, object, embed, iframe, frame, frameset")
.forEach((el: Element) => el.remove());
document.querySelectorAll("*").forEach((el: Element) => sanitizeElement(el));
document
.querySelectorAll("*")
.forEach((el: Element) => sanitizeElement(el, cidMap, baseUrl));
// Full documents expose a <body>; bodyless fragments are serialized directly
// so that sanitization and cid rewriting still apply to their nodes.
const body = document.querySelector("body");
return body ? body.innerHTML : content;
return body ? body.innerHTML : document.toString();
}