mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
fix(domain): cut confirmation false positives via weak subscribe signal
A "manage subscription" / "subscribe" footer link is now a weak (+1) URL signal instead of strong (+2), so an ordinary newsletter with a stray body keyword (active/valid) no longer crosses the detection threshold. A genuine "confirm your subscription" subject + a bare /subscribe link still passes. Also dedupe surfaced links. Adds false-positive + recall + dedupe tests.
This commit is contained in:
@@ -95,4 +95,79 @@ describe("detectConfirmation", () => {
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
// ── False-positive guards: ordinary newsletters must NOT be flagged ──────────
|
||||
// A "manage subscription" footer link is only a weak signal (+1), so a stray
|
||||
// body keyword (active/valid) cannot push it over the threshold.
|
||||
|
||||
it("does not flag a newsletter with a manage-subscription footer + 'active' in body", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "This week in tech",
|
||||
text: "Thanks to our most active community members for the great discussion.",
|
||||
links: [
|
||||
{ href: "https://news.example.com/article/42", text: "Read more" },
|
||||
{
|
||||
href: "https://news.example.com/account/subscription",
|
||||
text: "Manage your subscription",
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag a newsletter with a subscription-preferences link + 'valid' in body", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Weekend deals are here",
|
||||
text: "These offers are valid until Friday — don't miss out.",
|
||||
links: [
|
||||
{
|
||||
href: "https://shop.example.com/subscription/preferences",
|
||||
text: "Subscription preferences",
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag a marketing 'Subscribe & save' CTA + 'activate' in body", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Your weekly digest",
|
||||
text: "Activate your free trial and start saving today.",
|
||||
links: [
|
||||
{
|
||||
href: "https://shop.example.com/subscribe",
|
||||
text: "Subscribe & save",
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
// ── Recall: a genuine confirmation still passes via the weak signal ──────────
|
||||
it("detects a genuine confirm-subscription email whose only link is a bare /subscribe", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Please confirm your subscription",
|
||||
text: "Tap the button to finish signing up.",
|
||||
links: [
|
||||
{
|
||||
href: "https://news.example.com/subscribe/abc123",
|
||||
text: "Subscribe",
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(result).not.toBeNull();
|
||||
expect(result![0]).toBe("https://news.example.com/subscribe/abc123");
|
||||
});
|
||||
|
||||
it("dedupes a confirmation link repeated in the body", () => {
|
||||
const result = detectConfirmation({
|
||||
subject: "Confirm your subscription",
|
||||
text: "verify your address",
|
||||
links: [
|
||||
{ href: "https://x.example/confirm?token=1", text: "Confirm" },
|
||||
{ href: "https://x.example/confirm?token=1", text: "Confirm here" },
|
||||
],
|
||||
});
|
||||
expect(result).toEqual(["https://x.example/confirm?token=1"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -29,8 +29,9 @@ const KEYWORDS = [
|
||||
"optin",
|
||||
];
|
||||
|
||||
// Link URL/anchor signals (normalized). A link matching any → candidate.
|
||||
const LINK_SIGNALS = [
|
||||
// Strong URL signals: an unambiguous confirm/verify/activate action or a token.
|
||||
// A link URL matching any scores +2.
|
||||
const STRONG_LINK_SIGNALS = [
|
||||
"confirm",
|
||||
"verif",
|
||||
"activ",
|
||||
@@ -40,13 +41,17 @@ const LINK_SIGNALS = [
|
||||
"optin",
|
||||
"opt-in",
|
||||
"double-optin",
|
||||
"subscription",
|
||||
"subscribe",
|
||||
"token=",
|
||||
"confirm=",
|
||||
"activation",
|
||||
];
|
||||
|
||||
// Weak URL signals: ambiguous subscribe/subscription words that also appear in
|
||||
// ordinary "manage subscription" footers. Worth only +1 so they cannot, on their
|
||||
// own (with a stray body keyword), cross the threshold and cry wolf — but still
|
||||
// let a genuine "confirm your subscription" subject + a bare /subscribe link pass.
|
||||
const WEAK_LINK_SIGNALS = ["subscription", "subscribe"];
|
||||
|
||||
// Negative patterns: a link matching any of these is NEVER a candidate, and these
|
||||
// tokens are stripped from text before keyword scanning (kills the unsubscribe
|
||||
// false positive — "unsubscribe" contains "subscribe").
|
||||
@@ -79,7 +84,8 @@ function linkScore(href: string, text: string): number {
|
||||
const t = normalize(text);
|
||||
if (matchesAny(h, NEGATIVE) || matchesAny(t, NEGATIVE)) return 0;
|
||||
let score = 0;
|
||||
if (matchesAny(h, LINK_SIGNALS)) score += 2;
|
||||
if (matchesAny(h, STRONG_LINK_SIGNALS)) score += 2;
|
||||
else if (matchesAny(h, WEAK_LINK_SIGNALS)) score += 1;
|
||||
if (matchesAny(t, KEYWORDS)) score += 2;
|
||||
return score;
|
||||
}
|
||||
@@ -110,5 +116,7 @@ export function detectConfirmation(
|
||||
|
||||
if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null;
|
||||
|
||||
return candidates.slice(0, 3).map((c) => c.href);
|
||||
// Dedupe by href before capping, so a link repeated in the body never wastes
|
||||
// one of the three surfaced slots.
|
||||
return [...new Set(candidates.map((c) => c.href))].slice(0, 3);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user