Files
kill-the-news/src/domain/email-parser.test.ts
T
Julien Herr 1a4a479190 feat: decouple read FeedId from inbound MailboxId
Separate the two feed identities so the public read URL never reveals the
inbound address and vice-versa:

- FeedId becomes an opaque high-entropy token (read id + KV key); MailboxId
  (noun.noun.NN) owns the inbound address and the untrusted-input boundary
  via MailboxId.parse. They map only through the inbound:<mailbox> secondary
  index, resolved solely at reception.
- inbound index lifecycle is owned by FeedRepository: written by save/saveConfig,
  dropped by removeFromList(Bulk) — symmetric, never mirrored by hand (removes the
  manual delete in feed-service + the cron loop, and a silent empty-catch).
- Feed.mailboxId exposes a MailboxId VO (symmetry with Feed.id); the
  mailbox@domain shape lives on MailboxId.emailAddress(domain).
- Distinguish mailbox_unknown (no feed claims the address) from feed_not_found
  (dangling index) for observability; both forwardable, both 404.
- Drop the redundant EmailParser.extractMailbox pass-through so MailboxId.parse
  is the single parse boundary.

Docs (README/INSTALL/CLAUDE.md/landing) and tests updated; 439 tests green,
tsc clean, build dry-run OK.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-24 22:46:37 +02:00

157 lines
4.9 KiB
TypeScript

import { describe, it, expect } from "vitest";
import { EmailParser } from "./email-parser";
// Inbound mailbox parsing lives on the MailboxId VO (see mailbox-id.test.ts);
// EmailParser no longer wraps it.
describe("EmailParser.decodeEncodedWords", () => {
it("returns plain text unchanged", () => {
expect(EmailParser.decodeEncodedWords("Hello World")).toBe("Hello World");
});
it("returns empty string for empty input", () => {
expect(EmailParser.decodeEncodedWords("")).toBe("");
});
it("decodes a Base64-encoded word (UTF-8 subject)", () => {
// =?UTF-8?B?SGVsbG8=?= → "Hello"
expect(EmailParser.decodeEncodedWords("=?UTF-8?B?SGVsbG8=?=")).toBe(
"Hello",
);
});
it("decodes a quoted-printable encoded word", () => {
// =?UTF-8?Q?caf=C3=A9?= → "café" (but decodeQuotedPrintable works byte-by-byte)
// Use a simple ASCII QP sequence to stay charset-agnostic in tests
// =?US-ASCII?Q?Hello=20World?= → "Hello World" (=20 → space, _ → space)
expect(EmailParser.decodeEncodedWords("=?US-ASCII?Q?Hello=20World?=")).toBe(
"Hello World",
);
});
it("decodes underscores as spaces in QP encoding", () => {
expect(EmailParser.decodeEncodedWords("=?US-ASCII?Q?Hello_World?=")).toBe(
"Hello World",
);
});
it("leaves unrecognised encoded-word syntax unchanged", () => {
expect(EmailParser.decodeEncodedWords("=?UTF-8?X?something?=")).toBe(
"=?UTF-8?X?something?=",
);
});
});
describe("EmailParser.parseForwardEmailPayload", () => {
it("throws on null payload", () => {
expect(() => EmailParser.parseForwardEmailPayload(null)).toThrow(
"Missing or invalid webhook payload",
);
});
it("throws on undefined payload", () => {
expect(() => EmailParser.parseForwardEmailPayload(undefined)).toThrow();
});
it("parses subject, from, and HTML content", () => {
const payload = {
subject: "Test Subject",
from: { text: "sender@example.com" },
html: "<p>Hello</p>",
date: "2024-01-15T10:00:00.000Z",
};
const result = EmailParser.parseForwardEmailPayload(payload);
expect(result.subject).toBe("Test Subject");
expect(result.from).toBe("sender@example.com");
expect(result.content).toBe("<p>Hello</p>");
expect(result.receivedAt).toBe(
new Date("2024-01-15T10:00:00.000Z").getTime(),
);
});
it("prefers HTML content over plain text", () => {
const payload = {
from: { text: "a@b.com" },
html: "<b>HTML</b>",
text: "Plain",
};
expect(EmailParser.parseForwardEmailPayload(payload).content).toBe(
"<b>HTML</b>",
);
});
it("falls back to plain text when HTML is absent", () => {
const payload = {
from: { text: "a@b.com" },
text: "Plain text",
};
expect(EmailParser.parseForwardEmailPayload(payload).content).toBe(
"Plain text",
);
});
it("uses structured from.value when from.text is absent", () => {
const payload = {
from: {
value: [{ name: "Alice", address: "alice@example.com" }],
},
html: "",
};
const result = EmailParser.parseForwardEmailPayload(payload);
expect(result.from).toBe("Alice <alice@example.com>");
});
it("falls back to Unknown Sender when from is absent", () => {
const result = EmailParser.parseForwardEmailPayload({ html: "" });
expect(result.from).toBe("Unknown Sender");
});
it("uses Date.now() when date field is absent", () => {
const before = Date.now();
const result = EmailParser.parseForwardEmailPayload({
from: { text: "x@y.com" },
});
const after = Date.now();
expect(result.receivedAt).toBeGreaterThanOrEqual(before);
expect(result.receivedAt).toBeLessThanOrEqual(after);
});
it("defaults subject to 'No Subject' when absent", () => {
const result = EmailParser.parseForwardEmailPayload({
from: { text: "x@y.com" },
});
expect(result.subject).toBe("No Subject");
});
it("extracts headers from headerLines array", () => {
const payload = {
from: { text: "x@y.com" },
headerLines: [
{ key: "X-Custom", line: "X-Custom: my-value" },
{ key: "List-ID", line: "List-ID: <list.example.com>" },
],
};
const result = EmailParser.parseForwardEmailPayload(payload);
expect(result.headers["x-custom"]).toBe("my-value");
expect(result.headers["list-id"]).toBe("<list.example.com>");
});
it("extracts headers from raw headers string", () => {
const payload = {
from: { text: "x@y.com" },
headers: "X-Foo: bar\r\nX-Baz: qux",
};
const result = EmailParser.parseForwardEmailPayload(payload);
expect(result.headers["x-foo"]).toBe("bar");
expect(result.headers["x-baz"]).toBe("qux");
});
it("decodes RFC 2047 encoded-word subjects", () => {
const payload = {
from: { text: "x@y.com" },
subject: "=?UTF-8?B?SGVsbG8=?=",
};
expect(EmailParser.parseForwardEmailPayload(payload).subject).toBe("Hello");
});
});