import { describe, it, expect } from "vitest"; import { processEmailContent, extractInlineCids, htmlToText, extractLinks, extractFeedLinks, } from "./html-processor"; import type { AttachmentData } from "../types"; describe("processEmailContent — body extraction", () => { it("extracts content inside tags", () => { const html = "

Hello

"; expect(processEmailContent(html)).toBe("

Hello

"); }); it("handles body tag with attributes", () => { const html = '

Hi

'; expect(processEmailContent(html)).toBe("

Hi

"); }); it("returns fragment unchanged when no body tags present", () => { const fragment = "

Already a fragment

"; expect(processEmailContent(fragment)).toBe("

Already a fragment

"); }); it("is case-insensitive for body tag matching", () => { const html = "

content

"; expect(processEmailContent(html)).toBe("

content

"); }); }); describe("processEmailContent — plain text", () => { it("wraps plain text in
", () => {
    const text = "Hello world\nSecond line";
    const result = processEmailContent(text);
    expect(result).toMatch(/^
 in plain text", () => {
    const text = "Price < 10 & size > 5";
    const result = processEmailContent(text);
    expect(result).toContain("<");
    expect(result).toContain(">");
    expect(result).toContain("&");
    expect(result).not.toContain("<10");
  });

  it("returns empty string for empty input", () => {
    expect(processEmailContent("")).toBe("");
  });
});

describe("processEmailContent — dangerous element removal", () => {
  it("removes ";
    const result = processEmailContent(html);
    expect(result).not.toContain("Hello

"); }); it("removes

ok

"; const result = processEmailContent(html); expect(result).not.toContain("ok

"); }); it("removes and tags", () => { const html = "

ok

"; const result = processEmailContent(html); expect(result).not.toContain(" { it("removes event handler attributes", () => { const html = "link"; const result = processEmailContent(html); expect(result).not.toContain("onclick"); expect(result).toContain('href="https://x.com"'); }); it("removes onerror on images", () => { const html = ""; const result = processEmailContent(html); expect(result).not.toContain("onerror"); }); it("removes javascript: hrefs", () => { const html = "click"; const result = processEmailContent(html); expect(result).not.toContain("javascript:"); }); it("preserves legitimate href and src attributes", () => { const html = "link"; const result = processEmailContent(html); expect(result).toContain("https://example.com"); }); }); describe("processEmailContent — mso style cleanup", () => { it("strips mso-* properties from inline styles", () => { const html = '

text

'; const result = processEmailContent(html); expect(result).not.toContain("mso-margin-top"); expect(result).toContain("color: red"); }); it("removes style attribute entirely when only mso properties remain", () => { const html = '

text

'; const result = processEmailContent(html); expect(result).not.toContain("style="); }); it("preserves style attribute when non-mso properties remain", () => { const html = '

text

'; const result = processEmailContent(html); expect(result).toContain("font-weight"); expect(result).not.toContain("mso-font-size"); }); }); describe("processEmailContent — inline cid: rewriting", () => { const attachment = ( overrides: Partial = {}, ): AttachmentData => ({ id: "att-123", filename: "chicken big.png", contentType: "image/png", size: 100, contentId: "ii_mpi85rqy0", ...overrides, }); it("rewrites cid: src to a relative /files URL when no baseUrl", () => { const html = 'x'; const result = processEmailContent(html, [attachment()]); expect(result).toContain('src="/files/att-123/chicken%20big.png"'); expect(result).not.toContain("cid:"); }); it("rewrites cid: src to an absolute URL when baseUrl is given", () => { const html = ''; const result = processEmailContent( html, [attachment()], "https://feed.example", ); expect(result).toContain( 'src="https://feed.example/files/att-123/chicken%20big.png"', ); }); it("matches a stored Content-ID that has angle brackets", () => { const html = ''; const result = processEmailContent(html, [ attachment({ contentId: "" }), ]); expect(result).toContain('src="/files/att-123/chicken%20big.png"'); }); it("is case-insensitive on the cid: scheme", () => { const html = ''; const result = processEmailContent(html, [attachment()]); expect(result).toContain('src="/files/att-123/chicken%20big.png"'); }); it("leaves unknown cid references unchanged", () => { const html = ''; const result = processEmailContent(html, [attachment()]); expect(result).toContain('src="cid:unknown"'); }); it("leaves cid references unchanged when no attachments are provided", () => { const html = ''; const result = processEmailContent(html); expect(result).toContain('src="cid:ii_mpi85rqy0"'); }); it("ignores attachments without a contentId", () => { const html = ''; const result = processEmailContent(html, [ attachment({ contentId: undefined }), ]); expect(result).toContain('src="cid:ii_mpi85rqy0"'); }); it("does not touch normal http image sources", () => { const html = ''; const result = processEmailContent(html, [attachment()]); expect(result).toContain('src="https://example.com/a.png"'); }); }); describe("processEmailContent — lazy image promotion", () => { it("promotes data-src to src when src is missing", () => { const html = ''; const result = processEmailContent(html); expect(result).toContain('src="https://x.com/a.png"'); }); it("promotes data-src over a data: placeholder src", () => { const html = ''; const result = processEmailContent(html); expect(result).toContain('src="https://x.com/a.png"'); expect(result).not.toContain("data:image/gif"); }); it("does not clobber a real src with data-src", () => { const html = ''; const result = processEmailContent(html); expect(result).toContain('src="https://real.com/a.png"'); }); it("promotes data-srcset when srcset is absent", () => { const html = ''; const result = processEmailContent(html); expect(result).toContain('srcset="https://x.com/a.png 2x"'); }); it("strips loading=lazy", () => { const html = ''; const result = processEmailContent(html); expect(result).not.toContain("loading"); }); }); describe("processEmailContent — relative URL absolutization", () => { const base = "https://news.example.com/"; it("absolutizes a root-relative href against the sender base", () => { const html = 'link'; const result = processEmailContent(html, undefined, "", base); expect(result).toContain('href="https://news.example.com/path"'); }); it("absolutizes a relative img src against the sender base", () => { const html = ''; const result = processEmailContent(html, undefined, "", base); expect(result).toContain('src="https://news.example.com/img/a.png"'); }); it("resolves protocol-relative URLs using https", () => { const html = ''; const result = processEmailContent(html, undefined, "", base); expect(result).toContain('src="https://cdn.example.com/a.png"'); }); it("leaves absolute URLs unchanged", () => { const html = 'l'; const result = processEmailContent(html, undefined, "", base); expect(result).toContain('href="https://other.com/x"'); }); it("does not touch relative URLs when no sender base is given", () => { const html = 'link'; const result = processEmailContent(html); expect(result).toContain('href="/path"'); }); it("does not absolutize mailto: or anchors", () => { const html = 'mt'; const result = processEmailContent(html, undefined, "", base); expect(result).toContain('href="mailto:x@y.com"'); expect(result).toContain('href="#top"'); }); }); describe("htmlToText", () => { it("strips HTML tags", () => { expect(htmlToText("Bold text")).toBe("Bold text"); }); it("decodes HTML entities", () => { expect(htmlToText("Tom & Jerry <3")).toBe("Tom & Jerry <3"); }); it("collapses whitespace and trims", () => { expect(htmlToText(" a\n\n b ")).toBe("a b"); }); it("returns empty string for empty input", () => { expect(htmlToText("")).toBe(""); }); it("leaves plain text untouched", () => { expect(htmlToText("Just a subject")).toBe("Just a subject"); }); }); describe("extractInlineCids", () => { it("collects normalized cids referenced by cid: image sources", () => { const html = ''; expect(extractInlineCids(html)).toEqual(new Set(["ii_abc", "ii_def"])); }); it("ignores non-cid sources", () => { const html = ''; expect(extractInlineCids(html).size).toBe(0); }); it("returns an empty set for plain text", () => { expect(extractInlineCids("just text, no html").size).toBe(0); }); it("returns an empty set for empty input", () => { expect(extractInlineCids("").size).toBe(0); }); }); describe("extractLinks", () => { it("collects anchor href + text from HTML", () => { const links = extractLinks( '

hi Confirm and Home

', ); expect(links).toEqual([ { href: "https://x.example/confirm?t=1", text: "Confirm" }, { href: "https://x.example/home", text: "Home" }, ]); }); it("falls back to regex URL extraction for plain text", () => { const links = extractLinks( "Confirm here: https://x.example/verify/abc thanks", ); expect(links).toEqual([ { href: "https://x.example/verify/abc", text: "https://x.example/verify/abc", }, ]); }); it("returns an empty array for empty content", () => { expect(extractLinks("")).toEqual([]); }); }); describe("extractFeedLinks", () => { it("extracts rel=alternate links that carry a type", () => { const html = ` hi`; expect(extractFeedLinks(html)).toEqual([ { href: "https://blog.example.com/feed.xml", type: "application/rss+xml", }, { href: "https://blog.example.com/atom.xml", type: "application/atom+xml", }, ]); }); it("ignores non-alternate rels and links without a type", () => { const html = ` `; expect(extractFeedLinks(html)).toEqual([]); }); it("absolutizes a relative href against the base", () => { const html = ``; expect(extractFeedLinks(html, "https://blog.example.com")).toEqual([ { href: "https://blog.example.com/feed.xml", type: "application/rss+xml", }, ]); }); it("drops a relative href when no base is given", () => { const html = ``; expect(extractFeedLinks(html)).toEqual([]); }); it("returns [] for plain-text bodies", () => { expect(extractFeedLinks("just text https://x.com/feed")).toEqual([]); }); });