diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c8a591..61ffd7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,12 @@ verbatim as the GitHub Release notes — so what you write here is what ships. ### Fixed +- Subscription-confirmation detection now flags code-based signup verifications + (OTP) that have no link to click — e.g. "Your verification code is 371404", + whose only link is a `mailto:` support address. These cleared the keyword + threshold but were dropped because the detector required an http(s) candidate + link. A code path now raises the flag/badge/banner when a verification keyword + sits next to an OTP-style code; the code itself is never extracted or surfaced. - Subscription-confirmation detection now recognizes localized "subscribe" CTAs. The weak link-signal vocabulary was English-only (`subscrib`), so a genuine double opt-in whose confirm button reads "Je m'inscris…" over an diff --git a/src/application/email-processor.ts b/src/application/email-processor.ts index 039e3f8..5d943bd 100644 --- a/src/application/email-processor.ts +++ b/src/application/email-processor.ts @@ -236,7 +236,9 @@ async function storeEmail( ...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}), ...(messageId ? { messageId } : {}), dedupHash, - ...(confirmationLinks + // null = not a confirmation; [] = a code-based confirmation (flag it, no + // link to surface). Both an empty and a populated array mean "detected". + ...(confirmationLinks !== null ? { confirmation: { links: confirmationLinks } } : {}), }; diff --git a/src/domain/confirmation.test.ts b/src/domain/confirmation.test.ts index eb700b3..c19b603 100644 --- a/src/domain/confirmation.test.ts +++ b/src/domain/confirmation.test.ts @@ -199,6 +199,54 @@ describe("detectConfirmation", () => { expect(result![0]).toContain("proc.php"); }); + // ── Code-based signup confirmations (OTP) with no clickable link ───────────── + // Some signups send a verification *code* to enter manually — there is nothing + // to click. We still flag these (empty links: detected but no actionable link), + // but never extract or surface the code itself. + + it("flags an OTP signup email whose only link is a mailto", () => { + const result = detectConfirmation({ + subject: "❄️ Ton code de vérification est 371404", + text: "Salut ! Entre le code de vérification ci-dessous lorsqu'il te sera demandé : 371404. Tu n'as rien demandé ?", + links: [ + { + href: "mailto:hey@example.com?subject=Acc%C3%A8s+frauduleux", + text: "contacter le support", + }, + ], + }); + expect(result).toEqual([]); + }); + + it("flags a code email via a body keyword + code pattern when there are no links", () => { + const result = detectConfirmation({ + subject: "Welcome to Acme", + text: "Your verification code is 246810. Enter it to finish signing up.", + links: [], + }); + expect(result).toEqual([]); + }); + + it("does not flag a transactional email with a big number but no code-near-code-word", () => { + const result = detectConfirmation({ + subject: "Order confirmed", + text: "Your order 12345678 ships Monday.", + links: [ + { href: "https://shop.example.com/track/12345678", text: "Track" }, + ], + }); + expect(result).toBeNull(); + }); + + it("does not flag a newsletter with numbers but no verification keyword", () => { + const result = detectConfirmation({ + subject: "Your 2026 wrapped: 4567 minutes listened", + text: "Here is your year in review with code 9999 highlights.", + links: [{ href: "https://music.example.com/wrapped", text: "See more" }], + }); + expect(result).toBeNull(); + }); + it("dedupes a confirmation link repeated in the body", () => { const result = detectConfirmation({ subject: "Confirm your subscription", diff --git a/src/domain/confirmation.ts b/src/domain/confirmation.ts index e2e77ee..36bfa58 100644 --- a/src/domain/confirmation.ts +++ b/src/domain/confirmation.ts @@ -5,8 +5,11 @@ * the link-signal patterns, the scoring weights and the threshold. * * Returns the ranked candidate confirmation links (top 3) when the combined score - * clears the threshold AND at least one candidate link exists; otherwise null. - * Only http(s) links are ever considered or returned. + * clears the threshold AND at least one candidate link exists. When the email is a + * code-based signup verification (a verification keyword next to an OTP-style code, + * with no clickable link — e.g. "your verification code is 371404") it returns an + * empty array: detected, but nothing to click. Returns null when not a confirmation. + * Only http(s) links are ever considered or returned; the code is never extracted. */ export interface DetectConfirmationInput { @@ -76,6 +79,21 @@ const NEGATIVE = [ const THRESHOLD = 3; +// A verification code (OTP) sitting next to a code-ish word, in either order and +// within a short window — "your verification code is 371404" / "371404 is your +// code". This is the signup-by-code case that has no link to click. Run on the +// already-normalized (lowercased, diacritics-stripped) subject/body. We only test +// for presence to raise the flag; the code value is never captured or surfaced. +const CODE_WORDS = "code|codigo|otp|verif"; +const CODE_PROXIMITY = 48; +const CODE_PATTERN = new RegExp( + `(?:${CODE_WORDS})[\\s\\S]{0,${CODE_PROXIMITY}}?\\b\\d{4,8}\\b|\\b\\d{4,8}\\b[\\s\\S]{0,${CODE_PROXIMITY}}?(?:${CODE_WORDS})`, +); + +function hasVerificationCode(text: string): boolean { + return CODE_PATTERN.test(text); +} + function normalize(s: string): string { return s.normalize("NFD").replace(/[̀-ͯ]/g, "").toLowerCase(); } @@ -115,18 +133,32 @@ export function detectConfirmation( .filter((l) => l.score > 0) .sort((a, b) => b.score - a.score); - if (candidates.length === 0) return null; - const subject = stripNegatives(normalize(input.subject)); const text = stripNegatives(normalize(input.text)); const subjectScore = matchesAny(subject, KEYWORDS) ? 2 : 0; const bodyScore = matchesAny(text, KEYWORDS) ? 1 : 0; - const bestLinkScore = candidates[0].score; - if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null; + // Link path: a clickable confirm/verify/subscribe link clears the threshold. + if (candidates.length > 0) { + const bestLinkScore = candidates[0].score; + if (subjectScore + bodyScore + bestLinkScore >= THRESHOLD) { + // Dedupe by href before capping, so a link repeated in the body never + // wastes one of the three surfaced slots. + return [...new Set(candidates.map((c) => c.href))].slice(0, 3); + } + } - // Dedupe by href before capping, so a link repeated in the body never wastes - // one of the three surfaced slots. - return [...new Set(candidates.map((c) => c.href))].slice(0, 3); + // Code path: an OTP-style signup verification with no link to click. Requires + // both a verification keyword (subject or body) and a code-near-code-word + // pattern, so a stray number or a lone keyword cannot cry wolf. Flag it with + // an empty link list — detected, but nothing actionable to surface. + if ( + (subjectScore > 0 || bodyScore > 0) && + (hasVerificationCode(subject) || hasVerificationCode(text)) + ) { + return []; + } + + return null; }