From 5fc91a0be4bd25dd16e9398d337541bcaf4850a4 Mon Sep 17 00:00:00 2001 From: Julien Herr Date: Sat, 23 May 2026 18:47:20 +0200 Subject: [PATCH] refactor(html-processor): isolate cid rewrite from sanitization Keep sanitizeElement single-purpose and run the cid: rewrite as a separate guarded pass over [src] elements. Use a type-only import for AttachmentData. Co-Authored-By: Claude Opus 4.7 --- src/utils/html-processor.ts | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/utils/html-processor.ts b/src/utils/html-processor.ts index 22c1032..721081c 100644 --- a/src/utils/html-processor.ts +++ b/src/utils/html-processor.ts @@ -1,6 +1,6 @@ import { parseHTML } from "linkedom"; import escapeHtml from "escape-html"; -import { AttachmentData } from "../types"; +import type { AttachmentData } from "../types"; // Strip surrounding angle brackets and whitespace from a Content-ID so that a // stored value like "" matches an HTML reference "cid:ii_mpi85rqy0". @@ -40,11 +40,7 @@ function rewriteCidSrc( ); } -function sanitizeElement( - el: Element, - cidMap: Map, - baseUrl: string, -): void { +function sanitizeElement(el: Element): void { // Snapshot attribute names before mutating (linkedom attributes is array-like) const attrs = Array.from( el.attributes as unknown as ArrayLike<{ name: string }>, @@ -64,9 +60,6 @@ function sanitizeElement( } } } - if (cidMap.size > 0) { - rewriteCidSrc(el, cidMap, baseUrl); - } // Strip mso-* inline style properties (Office HTML noise) const style = el.getAttribute("style"); if (style !== null) { @@ -113,9 +106,13 @@ export function processEmailContent( .querySelectorAll("script, object, embed, iframe, frame, frameset") .forEach((el: Element) => el.remove()); - document - .querySelectorAll("*") - .forEach((el: Element) => sanitizeElement(el, cidMap, baseUrl)); + document.querySelectorAll("*").forEach((el: Element) => sanitizeElement(el)); + + if (cidMap.size > 0) { + document + .querySelectorAll("[src]") + .forEach((el: Element) => rewriteCidSrc(el, cidMap, baseUrl)); + } // Full documents expose a ; bodyless fragments are serialized directly // so that sanitization and cid rewriting still apply to their nodes.