mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
feat(domain): confirmation-email detection service
This commit is contained in:
@@ -0,0 +1,100 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { detectConfirmation } from "./confirmation";
|
||||||
|
|
||||||
|
describe("detectConfirmation", () => {
|
||||||
|
it("detects an English confirmation email and returns the confirm link", () => {
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "Please confirm your subscription",
|
||||||
|
text: "Click the button below to verify your email address.",
|
||||||
|
links: [
|
||||||
|
{
|
||||||
|
href: "https://news.example.com/confirm?token=abc123",
|
||||||
|
text: "Confirm subscription",
|
||||||
|
},
|
||||||
|
{ href: "https://news.example.com/home", text: "Home" },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.links[0]).toBe(
|
||||||
|
"https://news.example.com/confirm?token=abc123",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("detects a French confirmation email (accent-insensitive)", () => {
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "Confirmez votre inscription",
|
||||||
|
text: "Cliquez pour activer votre abonnement.",
|
||||||
|
links: [
|
||||||
|
{
|
||||||
|
href: "https://lettre.example.fr/valider/xyz",
|
||||||
|
text: "Valider mon inscription",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.links[0]).toBe("https://lettre.example.fr/valider/xyz");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns null for a normal newsletter with only an unsubscribe link", () => {
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "This week in tech",
|
||||||
|
text: "Here are the top stories. To stop receiving these, unsubscribe here.",
|
||||||
|
links: [
|
||||||
|
{ href: "https://news.example.com/article/42", text: "Read more" },
|
||||||
|
{
|
||||||
|
href: "https://news.example.com/unsubscribe?u=9",
|
||||||
|
text: "Unsubscribe",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns null when no candidate link is present even if the subject matches", () => {
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "Confirm your subscription",
|
||||||
|
text: "Reply to this email to confirm.",
|
||||||
|
links: [],
|
||||||
|
});
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("never treats an unsubscribe link as a confirmation candidate", () => {
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "Confirm your email",
|
||||||
|
text: "Verify your address.",
|
||||||
|
links: [
|
||||||
|
{ href: "https://x.example/verify/abc", text: "Verify email" },
|
||||||
|
{ href: "https://x.example/unsubscribe", text: "unsubscribe" },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.links).not.toContain("https://x.example/unsubscribe");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ranks the strongest candidate first and caps at three links", () => {
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "Confirm your subscription",
|
||||||
|
text: "verify activate",
|
||||||
|
links: [
|
||||||
|
{ href: "https://x.example/help", text: "help" },
|
||||||
|
{ href: "https://x.example/a?token=1", text: "click" },
|
||||||
|
{ href: "https://x.example/confirm?token=2", text: "Confirm" },
|
||||||
|
{ href: "https://x.example/activate", text: "Activate account" },
|
||||||
|
{ href: "https://x.example/verify", text: "Verify" },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result!.links.length).toBeLessThanOrEqual(3);
|
||||||
|
expect(result!.links[0]).toBe("https://x.example/confirm?token=2");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores non-http(s) links", () => {
|
||||||
|
const result = detectConfirmation({
|
||||||
|
subject: "Confirm your subscription",
|
||||||
|
text: "verify",
|
||||||
|
links: [{ href: "mailto:confirm@x.example", text: "confirm" }],
|
||||||
|
});
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,124 @@
|
|||||||
|
/**
|
||||||
|
* Pure detection of "confirm your subscription" emails. No DOM, no I/O — it
|
||||||
|
* receives already-extracted subject/body text and link tuples (infra parses the
|
||||||
|
* HTML). This module owns the business knowledge: the multilingual keyword vocab,
|
||||||
|
* the link-signal patterns, the scoring weights and the threshold.
|
||||||
|
*
|
||||||
|
* Returns the ranked candidate confirmation links (top 3) when the combined score
|
||||||
|
* clears the threshold AND at least one candidate link exists; otherwise null.
|
||||||
|
* Only http(s) links are ever considered or returned.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface DetectConfirmationInput {
|
||||||
|
subject: string;
|
||||||
|
text: string;
|
||||||
|
links: { href: string; text: string }[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ConfirmationResult {
|
||||||
|
score: number;
|
||||||
|
links: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirmation-positive stems, already normalized (lowercased, diacritics stripped).
|
||||||
|
// EN / FR / DE / ES — extend here to add a language.
|
||||||
|
const KEYWORDS = [
|
||||||
|
"confirm",
|
||||||
|
"verif",
|
||||||
|
"activ",
|
||||||
|
"valid",
|
||||||
|
"bestatig",
|
||||||
|
"aktivier",
|
||||||
|
"opt-in",
|
||||||
|
"opt in",
|
||||||
|
"optin",
|
||||||
|
];
|
||||||
|
|
||||||
|
// Link URL/anchor signals (normalized). A link matching any → candidate.
|
||||||
|
const LINK_SIGNALS = [
|
||||||
|
"confirm",
|
||||||
|
"verif",
|
||||||
|
"activ",
|
||||||
|
"valid",
|
||||||
|
"bestatig",
|
||||||
|
"aktivier",
|
||||||
|
"optin",
|
||||||
|
"opt-in",
|
||||||
|
"double-optin",
|
||||||
|
"subscription",
|
||||||
|
"subscribe",
|
||||||
|
"token=",
|
||||||
|
"confirm=",
|
||||||
|
"activation",
|
||||||
|
];
|
||||||
|
|
||||||
|
// Negative patterns: a link matching any of these is NEVER a candidate, and these
|
||||||
|
// tokens are stripped from text before keyword scanning (kills the unsubscribe
|
||||||
|
// false positive — "unsubscribe" contains "subscribe").
|
||||||
|
const NEGATIVE = [
|
||||||
|
"unsubscribe",
|
||||||
|
"desabonn",
|
||||||
|
"desinscri",
|
||||||
|
"abbestell",
|
||||||
|
"opt-out",
|
||||||
|
"optout",
|
||||||
|
"list-unsubscribe",
|
||||||
|
];
|
||||||
|
|
||||||
|
const THRESHOLD = 3;
|
||||||
|
|
||||||
|
function normalize(s: string): string {
|
||||||
|
return s.normalize("NFD").replace(/[̀-ͯ]/g, "").toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function isHttp(href: string): boolean {
|
||||||
|
return /^https?:\/\//i.test(href.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
function matchesAny(haystack: string, needles: string[]): boolean {
|
||||||
|
return needles.some((n) => haystack.includes(n));
|
||||||
|
}
|
||||||
|
|
||||||
|
function keywordHits(haystack: string): number {
|
||||||
|
return KEYWORDS.reduce((n, kw) => (haystack.includes(kw) ? n + 1 : n), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
function linkScore(href: string, text: string): number {
|
||||||
|
const h = normalize(href);
|
||||||
|
const t = normalize(text);
|
||||||
|
if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0;
|
||||||
|
let score = 0;
|
||||||
|
if (matchesAny(h, LINK_SIGNALS)) score += 2;
|
||||||
|
if (matchesAny(t, KEYWORDS)) score += 2;
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
function stripNegatives(text: string): string {
|
||||||
|
let out = text;
|
||||||
|
for (const n of NEGATIVE) out = out.split(n).join(" ");
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function detectConfirmation(
|
||||||
|
input: DetectConfirmationInput,
|
||||||
|
): ConfirmationResult | null {
|
||||||
|
const candidates = input.links
|
||||||
|
.filter((l) => isHttp(l.href))
|
||||||
|
.map((l) => ({ href: l.href.trim(), score: linkScore(l.href, l.text) }))
|
||||||
|
.filter((l) => l.score > 0)
|
||||||
|
.sort((a, b) => b.score - a.score);
|
||||||
|
|
||||||
|
if (candidates.length === 0) return null;
|
||||||
|
|
||||||
|
const subject = stripNegatives(normalize(input.subject));
|
||||||
|
const text = stripNegatives(normalize(input.text));
|
||||||
|
|
||||||
|
const subjectScore = keywordHits(subject) > 0 ? 2 : 0;
|
||||||
|
const bodyScore = keywordHits(text) > 0 ? 1 : 0;
|
||||||
|
const bestLinkScore = candidates[0].score;
|
||||||
|
|
||||||
|
const score = subjectScore + bodyScore + bestLinkScore;
|
||||||
|
if (score < THRESHOLD) return null;
|
||||||
|
|
||||||
|
return { score, links: candidates.slice(0, 3).map((c) => c.href) };
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user