mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
fix(attachments): render inline cid: images in emails and feeds
Capture each attachment's Content-ID at ingestion (postal-mime and mailparser paths) and rewrite cid: image refs to the stored /files URL in processEmailContent, shared by the entry view and RSS/Atom feeds. Bodyless HTML fragments are now serialized so sanitization and the cid rewrite apply to them too. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import PostalMime from "postal-mime";
|
import PostalMime from "postal-mime";
|
||||||
import { Env } from "../types";
|
import { Env } from "../types";
|
||||||
import { processEmail, RawAttachment } from "./email-processor";
|
import { processEmail, RawAttachment } from "./email-processor";
|
||||||
|
import { normalizeCid } from "../utils/html-processor";
|
||||||
|
|
||||||
export async function handleCloudflareEmail(
|
export async function handleCloudflareEmail(
|
||||||
message: ForwardableEmailMessage,
|
message: ForwardableEmailMessage,
|
||||||
@@ -27,6 +28,7 @@ export async function handleCloudflareEmail(
|
|||||||
filename: a.filename || "attachment",
|
filename: a.filename || "attachment",
|
||||||
contentType: a.mimeType || "application/octet-stream",
|
contentType: a.mimeType || "application/octet-stream",
|
||||||
content: a.content as ArrayBuffer,
|
content: a.content as ArrayBuffer,
|
||||||
|
contentId: normalizeCid(a.contentId),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
await processEmail(
|
await processEmail(
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ export interface RawAttachment {
|
|||||||
filename: string;
|
filename: string;
|
||||||
contentType: string;
|
contentType: string;
|
||||||
content: ArrayBuffer;
|
content: ArrayBuffer;
|
||||||
|
contentId?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ProcessEmailInput {
|
export interface ProcessEmailInput {
|
||||||
@@ -88,6 +89,7 @@ async function uploadAttachments(
|
|||||||
filename: att.filename,
|
filename: att.filename,
|
||||||
contentType: att.contentType,
|
contentType: att.contentType,
|
||||||
size: att.content.byteLength,
|
size: att.content.byteLength,
|
||||||
|
...(att.contentId ? { contentId: att.contentId } : {}),
|
||||||
};
|
};
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
import { EmailParser } from "../utils/email-parser";
|
import { EmailParser } from "../utils/email-parser";
|
||||||
import { Env } from "../types";
|
import { Env } from "../types";
|
||||||
import { processEmail, RawAttachment } from "./email-processor";
|
import { processEmail, RawAttachment } from "./email-processor";
|
||||||
|
import { normalizeCid } from "../utils/html-processor";
|
||||||
|
|
||||||
export interface ForwardEmailAttachment {
|
export interface ForwardEmailAttachment {
|
||||||
filename?: string;
|
filename?: string;
|
||||||
contentType?: string;
|
contentType?: string;
|
||||||
size?: number;
|
size?: number;
|
||||||
|
cid?: string;
|
||||||
|
contentId?: string;
|
||||||
content?: { type: "Buffer"; data: number[] } | ArrayBuffer | ArrayBufferView;
|
content?: { type: "Buffer"; data: number[] } | ArrayBuffer | ArrayBufferView;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,13 +76,14 @@ export async function handleForwardEmail(
|
|||||||
const emailData = EmailParser.parseForwardEmailPayload(payload);
|
const emailData = EmailParser.parseForwardEmailPayload(payload);
|
||||||
|
|
||||||
const rawAttachments: RawAttachment[] = (payload.attachments ?? [])
|
const rawAttachments: RawAttachment[] = (payload.attachments ?? [])
|
||||||
.map((a) => {
|
.map((a): RawAttachment | null => {
|
||||||
const buffer = toArrayBuffer(a.content);
|
const buffer = toArrayBuffer(a.content);
|
||||||
if (!buffer) return null;
|
if (!buffer) return null;
|
||||||
return {
|
return {
|
||||||
filename: a.filename || "attachment",
|
filename: a.filename || "attachment",
|
||||||
contentType: a.contentType || "application/octet-stream",
|
contentType: a.contentType || "application/octet-stream",
|
||||||
content: buffer,
|
content: buffer,
|
||||||
|
contentId: normalizeCid(a.cid ?? a.contentId),
|
||||||
};
|
};
|
||||||
})
|
})
|
||||||
.filter((a): a is RawAttachment => a !== null);
|
.filter((a): a is RawAttachment => a !== null);
|
||||||
|
|||||||
@@ -140,7 +140,9 @@ export async function handle(c: Context<{ Bindings: Env }>): Promise<Response> {
|
|||||||
<dd>${new Date(emailData.receivedAt).toUTCString()}</dd>
|
<dd>${new Date(emailData.receivedAt).toUTCString()}</dd>
|
||||||
</dl>
|
</dl>
|
||||||
<div class="content">
|
<div class="content">
|
||||||
${raw(processEmailContent(emailData.content))}
|
${raw(
|
||||||
|
processEmailContent(emailData.content, emailData.attachments),
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
${attachmentsSection}
|
${attachmentsSection}
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
@@ -256,6 +256,47 @@ describe("POST /api/inbound — attachment upload", () => {
|
|||||||
expect(mockR2._has(attachmentId)).toBe(true);
|
expect(mockR2._has(attachmentId)).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("persists the attachment Content-ID and rewrites inline cid: images on the entry page", async () => {
|
||||||
|
await env.EMAIL_STORAGE.put(
|
||||||
|
`feed:${VALID_FEED_ID}:config`,
|
||||||
|
JSON.stringify({}),
|
||||||
|
);
|
||||||
|
const payload = makePayload({
|
||||||
|
html: '<p>hi</p><img src="cid:ii_mpi85rqy0" alt="pic"/>',
|
||||||
|
attachments: [
|
||||||
|
{
|
||||||
|
filename: "pic.png",
|
||||||
|
contentType: "image/png",
|
||||||
|
cid: "ii_mpi85rqy0",
|
||||||
|
content: { type: "Buffer", data: [137, 80, 78] },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
const res = await worker.fetch(makeRequest(payload), env);
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
|
||||||
|
const metadata = (await env.EMAIL_STORAGE.get(
|
||||||
|
`feed:${VALID_FEED_ID}:metadata`,
|
||||||
|
{ type: "json" },
|
||||||
|
)) as any;
|
||||||
|
const emailData = (await env.EMAIL_STORAGE.get(metadata.emails[0].key, {
|
||||||
|
type: "json",
|
||||||
|
})) as any;
|
||||||
|
const attachmentId = emailData.attachments[0].id;
|
||||||
|
expect(emailData.attachments[0].contentId).toBe("ii_mpi85rqy0");
|
||||||
|
|
||||||
|
const entryRes = await worker.fetch(
|
||||||
|
new Request(
|
||||||
|
`https://${DOMAIN}/entries/${VALID_FEED_ID}/${metadata.emails[0].receivedAt}`,
|
||||||
|
),
|
||||||
|
env,
|
||||||
|
);
|
||||||
|
expect(entryRes.status).toBe(200);
|
||||||
|
const html = await entryRes.text();
|
||||||
|
expect(html).toContain(`/files/${attachmentId}/pic.png`);
|
||||||
|
expect(html).not.toContain("cid:ii_mpi85rqy0");
|
||||||
|
});
|
||||||
|
|
||||||
it("skips R2 when attachment content is null", async () => {
|
it("skips R2 when attachment content is null", async () => {
|
||||||
await env.EMAIL_STORAGE.put(
|
await env.EMAIL_STORAGE.put(
|
||||||
`feed:${VALID_FEED_ID}:config`,
|
`feed:${VALID_FEED_ID}:config`,
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ export interface AttachmentData {
|
|||||||
filename: string;
|
filename: string;
|
||||||
contentType: string;
|
contentType: string;
|
||||||
size: number;
|
size: number;
|
||||||
|
contentId?: string; // Normalized Content-ID (no <>) used to resolve inline cid: refs
|
||||||
}
|
}
|
||||||
|
|
||||||
// Email interface for stored emails
|
// Email interface for stored emails
|
||||||
|
|||||||
@@ -55,7 +55,11 @@ function buildFeed(
|
|||||||
for (const email of emails) {
|
for (const email of emails) {
|
||||||
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
|
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
|
||||||
const firstAttachment = email.attachments?.[0];
|
const firstAttachment = email.attachments?.[0];
|
||||||
const bodyContent = processEmailContent(email.content);
|
const bodyContent = processEmailContent(
|
||||||
|
email.content,
|
||||||
|
email.attachments,
|
||||||
|
baseUrl,
|
||||||
|
);
|
||||||
feed.addItem({
|
feed.addItem({
|
||||||
title: email.subject,
|
title: email.subject,
|
||||||
id: entryUrl,
|
id: entryUrl,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { describe, it, expect } from "vitest";
|
import { describe, it, expect } from "vitest";
|
||||||
import { processEmailContent } from "./html-processor";
|
import { processEmailContent } from "./html-processor";
|
||||||
|
import type { AttachmentData } from "../types";
|
||||||
|
|
||||||
describe("processEmailContent — body extraction", () => {
|
describe("processEmailContent — body extraction", () => {
|
||||||
it("extracts content inside <body> tags", () => {
|
it("extracts content inside <body> tags", () => {
|
||||||
@@ -123,3 +124,75 @@ describe("processEmailContent — mso style cleanup", () => {
|
|||||||
expect(result).not.toContain("mso-font-size");
|
expect(result).not.toContain("mso-font-size");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("processEmailContent — inline cid: rewriting", () => {
|
||||||
|
const attachment = (
|
||||||
|
overrides: Partial<AttachmentData> = {},
|
||||||
|
): AttachmentData => ({
|
||||||
|
id: "att-123",
|
||||||
|
filename: "chicken big.png",
|
||||||
|
contentType: "image/png",
|
||||||
|
size: 100,
|
||||||
|
contentId: "ii_mpi85rqy0",
|
||||||
|
...overrides,
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rewrites cid: src to a relative /files URL when no baseUrl", () => {
|
||||||
|
const html = '<body><img src="cid:ii_mpi85rqy0" alt="x"/></body>';
|
||||||
|
const result = processEmailContent(html, [attachment()]);
|
||||||
|
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
|
||||||
|
expect(result).not.toContain("cid:");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rewrites cid: src to an absolute URL when baseUrl is given", () => {
|
||||||
|
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
|
||||||
|
const result = processEmailContent(
|
||||||
|
html,
|
||||||
|
[attachment()],
|
||||||
|
"https://feed.example",
|
||||||
|
);
|
||||||
|
expect(result).toContain(
|
||||||
|
'src="https://feed.example/files/att-123/chicken%20big.png"',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("matches a stored Content-ID that has angle brackets", () => {
|
||||||
|
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
|
||||||
|
const result = processEmailContent(html, [
|
||||||
|
attachment({ contentId: "<ii_mpi85rqy0>" }),
|
||||||
|
]);
|
||||||
|
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
|
||||||
|
});
|
||||||
|
|
||||||
|
it("is case-insensitive on the cid: scheme", () => {
|
||||||
|
const html = '<body><img src="CID:ii_mpi85rqy0"/></body>';
|
||||||
|
const result = processEmailContent(html, [attachment()]);
|
||||||
|
expect(result).toContain('src="/files/att-123/chicken%20big.png"');
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves unknown cid references unchanged", () => {
|
||||||
|
const html = '<body><img src="cid:unknown"/></body>';
|
||||||
|
const result = processEmailContent(html, [attachment()]);
|
||||||
|
expect(result).toContain('src="cid:unknown"');
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves cid references unchanged when no attachments are provided", () => {
|
||||||
|
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
|
||||||
|
const result = processEmailContent(html);
|
||||||
|
expect(result).toContain('src="cid:ii_mpi85rqy0"');
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores attachments without a contentId", () => {
|
||||||
|
const html = '<body><img src="cid:ii_mpi85rqy0"/></body>';
|
||||||
|
const result = processEmailContent(html, [
|
||||||
|
attachment({ contentId: undefined }),
|
||||||
|
]);
|
||||||
|
expect(result).toContain('src="cid:ii_mpi85rqy0"');
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not touch normal http image sources", () => {
|
||||||
|
const html = '<body><img src="https://example.com/a.png"/></body>';
|
||||||
|
const result = processEmailContent(html, [attachment()]);
|
||||||
|
expect(result).toContain('src="https://example.com/a.png"');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -1,5 +1,16 @@
|
|||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import escapeHtml from "escape-html";
|
import escapeHtml from "escape-html";
|
||||||
|
import { AttachmentData } from "../types";
|
||||||
|
|
||||||
|
// Strip surrounding angle brackets and whitespace from a Content-ID so that a
|
||||||
|
// stored value like "<ii_mpi85rqy0>" matches an HTML reference "cid:ii_mpi85rqy0".
|
||||||
|
export function normalizeCid(
|
||||||
|
cid: string | null | undefined,
|
||||||
|
): string | undefined {
|
||||||
|
if (!cid) return undefined;
|
||||||
|
const trimmed = cid.trim().replace(/^<|>$/g, "").trim();
|
||||||
|
return trimmed || undefined;
|
||||||
|
}
|
||||||
|
|
||||||
function cleanMsoStyles(style: string): string {
|
function cleanMsoStyles(style: string): string {
|
||||||
return style
|
return style
|
||||||
@@ -13,7 +24,27 @@ function isPlainText(content: string): boolean {
|
|||||||
return !/<[a-z][\s\S]*>/i.test(content);
|
return !/<[a-z][\s\S]*>/i.test(content);
|
||||||
}
|
}
|
||||||
|
|
||||||
function sanitizeElement(el: Element): void {
|
function rewriteCidSrc(
|
||||||
|
el: Element,
|
||||||
|
cidMap: Map<string, AttachmentData>,
|
||||||
|
baseUrl: string,
|
||||||
|
): void {
|
||||||
|
const src = el.getAttribute("src") ?? "";
|
||||||
|
const match = src.match(/^\s*cid:(.+)$/i);
|
||||||
|
if (!match) return;
|
||||||
|
const attachment = cidMap.get(normalizeCid(match[1]) ?? "");
|
||||||
|
if (!attachment) return;
|
||||||
|
el.setAttribute(
|
||||||
|
"src",
|
||||||
|
`${baseUrl}/files/${attachment.id}/${encodeURIComponent(attachment.filename)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function sanitizeElement(
|
||||||
|
el: Element,
|
||||||
|
cidMap: Map<string, AttachmentData>,
|
||||||
|
baseUrl: string,
|
||||||
|
): void {
|
||||||
// Snapshot attribute names before mutating (linkedom attributes is array-like)
|
// Snapshot attribute names before mutating (linkedom attributes is array-like)
|
||||||
const attrs = Array.from(
|
const attrs = Array.from(
|
||||||
el.attributes as unknown as ArrayLike<{ name: string }>,
|
el.attributes as unknown as ArrayLike<{ name: string }>,
|
||||||
@@ -33,6 +64,9 @@ function sanitizeElement(el: Element): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (cidMap.size > 0) {
|
||||||
|
rewriteCidSrc(el, cidMap, baseUrl);
|
||||||
|
}
|
||||||
// Strip mso-* inline style properties (Office HTML noise)
|
// Strip mso-* inline style properties (Office HTML noise)
|
||||||
const style = el.getAttribute("style");
|
const style = el.getAttribute("style");
|
||||||
if (style !== null) {
|
if (style !== null) {
|
||||||
@@ -52,22 +86,39 @@ function sanitizeElement(el: Element): void {
|
|||||||
* - Removes dangerous elements: <script>, <iframe>, <object>, <embed>
|
* - Removes dangerous elements: <script>, <iframe>, <object>, <embed>
|
||||||
* - Removes event handler attributes and javascript: URLs
|
* - Removes event handler attributes and javascript: URLs
|
||||||
* - Strips mso-* inline style properties (Office HTML)
|
* - Strips mso-* inline style properties (Office HTML)
|
||||||
|
* - Rewrites inline cid: image refs to the stored attachment URL. baseUrl=""
|
||||||
|
* yields relative URLs (entry page, same origin); a baseUrl yields absolute
|
||||||
|
* URLs (feeds, for external RSS readers).
|
||||||
*/
|
*/
|
||||||
export function processEmailContent(content: string): string {
|
export function processEmailContent(
|
||||||
|
content: string,
|
||||||
|
attachments?: AttachmentData[],
|
||||||
|
baseUrl = "",
|
||||||
|
): string {
|
||||||
if (!content) return "";
|
if (!content) return "";
|
||||||
|
|
||||||
if (isPlainText(content)) {
|
if (isPlainText(content)) {
|
||||||
return `<pre style="white-space: pre-wrap; word-break: break-word;">${escapeHtml(content)}</pre>`;
|
return `<pre style="white-space: pre-wrap; word-break: break-word;">${escapeHtml(content)}</pre>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const cidMap = new Map<string, AttachmentData>();
|
||||||
|
for (const att of attachments ?? []) {
|
||||||
|
const cid = normalizeCid(att.contentId);
|
||||||
|
if (cid) cidMap.set(cid, att);
|
||||||
|
}
|
||||||
|
|
||||||
const { document } = parseHTML(content);
|
const { document } = parseHTML(content);
|
||||||
|
|
||||||
document
|
document
|
||||||
.querySelectorAll("script, object, embed, iframe, frame, frameset")
|
.querySelectorAll("script, object, embed, iframe, frame, frameset")
|
||||||
.forEach((el: Element) => el.remove());
|
.forEach((el: Element) => el.remove());
|
||||||
|
|
||||||
document.querySelectorAll("*").forEach((el: Element) => sanitizeElement(el));
|
document
|
||||||
|
.querySelectorAll("*")
|
||||||
|
.forEach((el: Element) => sanitizeElement(el, cidMap, baseUrl));
|
||||||
|
|
||||||
|
// Full documents expose a <body>; bodyless fragments are serialized directly
|
||||||
|
// so that sanitization and cid rewriting still apply to their nodes.
|
||||||
const body = document.querySelector("body");
|
const body = document.querySelector("body");
|
||||||
return body ? body.innerHTML : content;
|
return body ? body.innerHTML : document.toString();
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user