mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
refactor(html-processor): isolate cid rewrite from sanitization
Keep sanitizeElement single-purpose and run the cid: rewrite as a separate guarded pass over [src] elements. Use a type-only import for AttachmentData. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import escapeHtml from "escape-html";
|
import escapeHtml from "escape-html";
|
||||||
import { AttachmentData } from "../types";
|
import type { AttachmentData } from "../types";
|
||||||
|
|
||||||
// Strip surrounding angle brackets and whitespace from a Content-ID so that a
|
// Strip surrounding angle brackets and whitespace from a Content-ID so that a
|
||||||
// stored value like "<ii_mpi85rqy0>" matches an HTML reference "cid:ii_mpi85rqy0".
|
// stored value like "<ii_mpi85rqy0>" matches an HTML reference "cid:ii_mpi85rqy0".
|
||||||
@@ -40,11 +40,7 @@ function rewriteCidSrc(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function sanitizeElement(
|
function sanitizeElement(el: Element): void {
|
||||||
el: Element,
|
|
||||||
cidMap: Map<string, AttachmentData>,
|
|
||||||
baseUrl: string,
|
|
||||||
): void {
|
|
||||||
// Snapshot attribute names before mutating (linkedom attributes is array-like)
|
// Snapshot attribute names before mutating (linkedom attributes is array-like)
|
||||||
const attrs = Array.from(
|
const attrs = Array.from(
|
||||||
el.attributes as unknown as ArrayLike<{ name: string }>,
|
el.attributes as unknown as ArrayLike<{ name: string }>,
|
||||||
@@ -64,9 +60,6 @@ function sanitizeElement(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cidMap.size > 0) {
|
|
||||||
rewriteCidSrc(el, cidMap, baseUrl);
|
|
||||||
}
|
|
||||||
// Strip mso-* inline style properties (Office HTML noise)
|
// Strip mso-* inline style properties (Office HTML noise)
|
||||||
const style = el.getAttribute("style");
|
const style = el.getAttribute("style");
|
||||||
if (style !== null) {
|
if (style !== null) {
|
||||||
@@ -113,9 +106,13 @@ export function processEmailContent(
|
|||||||
.querySelectorAll("script, object, embed, iframe, frame, frameset")
|
.querySelectorAll("script, object, embed, iframe, frame, frameset")
|
||||||
.forEach((el: Element) => el.remove());
|
.forEach((el: Element) => el.remove());
|
||||||
|
|
||||||
document
|
document.querySelectorAll("*").forEach((el: Element) => sanitizeElement(el));
|
||||||
.querySelectorAll("*")
|
|
||||||
.forEach((el: Element) => sanitizeElement(el, cidMap, baseUrl));
|
if (cidMap.size > 0) {
|
||||||
|
document
|
||||||
|
.querySelectorAll("[src]")
|
||||||
|
.forEach((el: Element) => rewriteCidSrc(el, cidMap, baseUrl));
|
||||||
|
}
|
||||||
|
|
||||||
// Full documents expose a <body>; bodyless fragments are serialized directly
|
// Full documents expose a <body>; bodyless fragments are serialized directly
|
||||||
// so that sanitization and cid rewriting still apply to their nodes.
|
// so that sanitization and cid rewriting still apply to their nodes.
|
||||||
|
|||||||
Reference in New Issue
Block a user