mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
feat(attachments): render inline cid images in place, not as attachments
Inline images (referenced by src="cid:…") are now classified at ingest and kept out of the downloadable attachment lists, RSS/Atom enclosures, and the API — while still stored in R2 and cleaned up with the email. Fixes the admin email preview, which injected raw HTML into the data: iframe so cid refs never resolved; it now rewrites them to absolute /files URLs. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -54,7 +54,8 @@ function buildFeed(
|
||||
|
||||
for (const email of emails) {
|
||||
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
|
||||
const firstAttachment = email.attachments?.[0];
|
||||
// Inline images are rendered in the body, not surfaced as an enclosure.
|
||||
const firstAttachment = email.attachments?.find((a) => !a.inline);
|
||||
const bodyContent = processEmailContent(
|
||||
email.content,
|
||||
email.attachments,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { processEmailContent } from "./html-processor";
|
||||
import { processEmailContent, extractInlineCids } from "./html-processor";
|
||||
import type { AttachmentData } from "../types";
|
||||
|
||||
describe("processEmailContent — body extraction", () => {
|
||||
@@ -196,3 +196,23 @@ describe("processEmailContent — inline cid: rewriting", () => {
|
||||
expect(result).toContain('src="https://example.com/a.png"');
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractInlineCids", () => {
|
||||
it("collects normalized cids referenced by cid: image sources", () => {
|
||||
const html = '<body><img src="cid:ii_abc"/><img src="CID:ii_def"/></body>';
|
||||
expect(extractInlineCids(html)).toEqual(new Set(["ii_abc", "ii_def"]));
|
||||
});
|
||||
|
||||
it("ignores non-cid sources", () => {
|
||||
const html = '<body><img src="https://example.com/a.png"/></body>';
|
||||
expect(extractInlineCids(html).size).toBe(0);
|
||||
});
|
||||
|
||||
it("returns an empty set for plain text", () => {
|
||||
expect(extractInlineCids("just text, no html").size).toBe(0);
|
||||
});
|
||||
|
||||
it("returns an empty set for empty input", () => {
|
||||
expect(extractInlineCids("").size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,6 +12,22 @@ export function normalizeCid(
|
||||
return trimmed || undefined;
|
||||
}
|
||||
|
||||
// Collect the normalized Content-IDs referenced by `cid:` image sources in the
|
||||
// email body — exactly the set rewriteCidSrc would turn into inline <img> URLs.
|
||||
// Used at ingest to flag those attachments as inline (rendered in place, hidden
|
||||
// from the downloadable attachment lists).
|
||||
export function extractInlineCids(content: string): Set<string> {
|
||||
const cids = new Set<string>();
|
||||
if (!content || isPlainText(content)) return cids;
|
||||
const { document } = parseHTML(content);
|
||||
document.querySelectorAll("[src]").forEach((el: Element) => {
|
||||
const match = (el.getAttribute("src") ?? "").match(/^\s*cid:(.+)$/i);
|
||||
const cid = match ? normalizeCid(match[1]) : undefined;
|
||||
if (cid) cids.add(cid);
|
||||
});
|
||||
return cids;
|
||||
}
|
||||
|
||||
function cleanMsoStyles(style: string): string {
|
||||
return style
|
||||
.split(";")
|
||||
|
||||
Reference in New Issue
Block a user