fix(domain): cut confirmation false positives via weak subscribe signal

A "manage subscription" / "subscribe" footer link is now a weak (+1) URL
signal instead of strong (+2), so an ordinary newsletter with a stray body
keyword (active/valid) no longer crosses the detection threshold. A genuine
"confirm your subscription" subject + a bare /subscribe link still passes.
Also dedupe surfaced links. Adds false-positive + recall + dedupe tests.
This commit is contained in:
Julien Herr
2026-05-25 10:58:20 +02:00
parent 421430632e
commit 4e3d378850
2 changed files with 89 additions and 6 deletions
+75
View File
@@ -95,4 +95,79 @@ describe("detectConfirmation", () => {
});
expect(result).toBeNull();
});
// ── False-positive guards: ordinary newsletters must NOT be flagged ──────────
// A "manage subscription" footer link is only a weak signal (+1), so a stray
// body keyword (active/valid) cannot push it over the threshold.
it("does not flag a newsletter with a manage-subscription footer + 'active' in body", () => {
const result = detectConfirmation({
subject: "This week in tech",
text: "Thanks to our most active community members for the great discussion.",
links: [
{ href: "https://news.example.com/article/42", text: "Read more" },
{
href: "https://news.example.com/account/subscription",
text: "Manage your subscription",
},
],
});
expect(result).toBeNull();
});
it("does not flag a newsletter with a subscription-preferences link + 'valid' in body", () => {
const result = detectConfirmation({
subject: "Weekend deals are here",
text: "These offers are valid until Friday — don't miss out.",
links: [
{
href: "https://shop.example.com/subscription/preferences",
text: "Subscription preferences",
},
],
});
expect(result).toBeNull();
});
it("does not flag a marketing 'Subscribe & save' CTA + 'activate' in body", () => {
const result = detectConfirmation({
subject: "Your weekly digest",
text: "Activate your free trial and start saving today.",
links: [
{
href: "https://shop.example.com/subscribe",
text: "Subscribe & save",
},
],
});
expect(result).toBeNull();
});
// ── Recall: a genuine confirmation still passes via the weak signal ──────────
it("detects a genuine confirm-subscription email whose only link is a bare /subscribe", () => {
const result = detectConfirmation({
subject: "Please confirm your subscription",
text: "Tap the button to finish signing up.",
links: [
{
href: "https://news.example.com/subscribe/abc123",
text: "Subscribe",
},
],
});
expect(result).not.toBeNull();
expect(result![0]).toBe("https://news.example.com/subscribe/abc123");
});
it("dedupes a confirmation link repeated in the body", () => {
const result = detectConfirmation({
subject: "Confirm your subscription",
text: "verify your address",
links: [
{ href: "https://x.example/confirm?token=1", text: "Confirm" },
{ href: "https://x.example/confirm?token=1", text: "Confirm here" },
],
});
expect(result).toEqual(["https://x.example/confirm?token=1"]);
});
});
+14 -6
View File
@@ -29,8 +29,9 @@ const KEYWORDS = [
"optin",
];
// Link URL/anchor signals (normalized). A link matching any → candidate.
const LINK_SIGNALS = [
// Strong URL signals: an unambiguous confirm/verify/activate action or a token.
// A link URL matching any scores +2.
const STRONG_LINK_SIGNALS = [
"confirm",
"verif",
"activ",
@@ -40,13 +41,17 @@ const LINK_SIGNALS = [
"optin",
"opt-in",
"double-optin",
"subscription",
"subscribe",
"token=",
"confirm=",
"activation",
];
// Weak URL signals: ambiguous subscribe/subscription words that also appear in
// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their
// own (with a stray body keyword), cross the threshold and cry wolf — but still
// let a genuine "confirm your subscription" subject + a bare /subscribe link pass.
const WEAK_LINK_SIGNALS = ["subscription", "subscribe"];
// Negative patterns: a link matching any of these is NEVER a candidate, and these
// tokens are stripped from text before keyword scanning (kills the unsubscribe
// false positive — "unsubscribe" contains "subscribe").
@@ -79,7 +84,8 @@ function linkScore(href: string, text: string): number {
const t = normalize(text);
if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0;
let score = 0;
if (matchesAny(h, LINK_SIGNALS)) score += 2;
if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2;
else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1;
if (matchesAny(t, KEYWORDS)) score += 2;
return score;
}
@@ -110,5 +116,7 @@ export function detectConfirmation(
if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null;
return candidates.slice(0, 3).map((c) => c.href);
// Dedupe by href before capping, so a link repeated in the body never wastes
// one of the three surfaced slots.
return [...new Set(candidates.map((c) => c.href))].slice(0, 3);
}