refactor(html-processor): isolate cid rewrite from sanitization

Keep sanitizeElement single-purpose and run the cid: rewrite as a
separate guarded pass over [src] elements. Use a type-only import for
AttachmentData.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-23 18:47:20 +02:00
parent debbfc623e
commit 5fc91a0be4
+9 -12
View File
@@ -1,6 +1,6 @@
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import escapeHtml from "escape-html"; import escapeHtml from "escape-html";
import { AttachmentData } from "../types"; import type { AttachmentData } from "../types";
// Strip surrounding angle brackets and whitespace from a Content-ID so that a // Strip surrounding angle brackets and whitespace from a Content-ID so that a
// stored value like "<ii_mpi85rqy0>" matches an HTML reference "cid:ii_mpi85rqy0". // stored value like "<ii_mpi85rqy0>" matches an HTML reference "cid:ii_mpi85rqy0".
@@ -40,11 +40,7 @@ function rewriteCidSrc(
); );
} }
function sanitizeElement( function sanitizeElement(el: Element): void {
el: Element,
cidMap: Map<string, AttachmentData>,
baseUrl: string,
): void {
// Snapshot attribute names before mutating (linkedom attributes is array-like) // Snapshot attribute names before mutating (linkedom attributes is array-like)
const attrs = Array.from( const attrs = Array.from(
el.attributes as unknown as ArrayLike<{ name: string }>, el.attributes as unknown as ArrayLike<{ name: string }>,
@@ -64,9 +60,6 @@ function sanitizeElement(
} }
} }
} }
if (cidMap.size > 0) {
rewriteCidSrc(el, cidMap, baseUrl);
}
// Strip mso-* inline style properties (Office HTML noise) // Strip mso-* inline style properties (Office HTML noise)
const style = el.getAttribute("style"); const style = el.getAttribute("style");
if (style !== null) { if (style !== null) {
@@ -113,9 +106,13 @@ export function processEmailContent(
.querySelectorAll("script, object, embed, iframe, frame, frameset") .querySelectorAll("script, object, embed, iframe, frame, frameset")
.forEach((el: Element) => el.remove()); .forEach((el: Element) => el.remove());
document document.querySelectorAll("*").forEach((el: Element) => sanitizeElement(el));
.querySelectorAll("*")
.forEach((el: Element) => sanitizeElement(el, cidMap, baseUrl)); if (cidMap.size > 0) {
document
.querySelectorAll("[src]")
.forEach((el: Element) => rewriteCidSrc(el, cidMap, baseUrl));
}
// Full documents expose a <body>; bodyless fragments are serialized directly // Full documents expose a <body>; bodyless fragments are serialized directly
// so that sanitization and cid rewriting still apply to their nodes. // so that sanitization and cid rewriting still apply to their nodes.