fix(confirmation): flag code-based OTP signups with no clickable link

Detect verification-code signups (e.g. "your verification code is
371404") whose only link is a mailto. These cleared the keyword
threshold but were dropped because the detector required an http(s)
candidate link. A code path now raises the flag/badge/banner when a
verification keyword sits next to an OTP-style code; the code is never
extracted or surfaced.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-25 23:46:14 +02:00
parent 3f35435610
commit 4d3a94d1ec
4 changed files with 98 additions and 10 deletions
+6
View File
@@ -21,6 +21,12 @@ verbatim as the GitHub Release notes — so what you write here is what ships.
### Fixed
- Subscription-confirmation detection now flags code-based signup verifications
(OTP) that have no link to click — e.g. "Your verification code is 371404",
whose only link is a `mailto:` support address. These cleared the keyword
threshold but were dropped because the detector required an http(s) candidate
link. A code path now raises the flag/badge/banner when a verification keyword
sits next to an OTP-style code; the code itself is never extracted or surfaced.
- Subscription-confirmation detection now recognizes localized "subscribe" CTAs.
The weak link-signal vocabulary was English-only (`subscrib`),
so a genuine double opt-in whose confirm button reads "Je m'inscris…" over an
+3 -1
View File
@@ -236,7 +236,9 @@ async function storeEmail(
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
...(messageId ? { messageId } : {}),
dedupHash,
...(confirmationLinks
// null = not a confirmation; [] = a code-based confirmation (flag it, no
// link to surface). Both an empty and a populated array mean "detected".
...(confirmationLinks !== null
? { confirmation: { links: confirmationLinks } }
: {}),
};
+48
View File
@@ -199,6 +199,54 @@ describe("detectConfirmation", () => {
expect(result![0]).toContain("proc.php");
});
// ── Code-based signup confirmations (OTP) with no clickable link ─────────────
// Some signups send a verification *code* to enter manually — there is nothing
// to click. We still flag these (empty links: detected but no actionable link),
// but never extract or surface the code itself.
it("flags an OTP signup email whose only link is a mailto", () => {
const result = detectConfirmation({
subject: "❄️ Ton code de vérification est 371404",
text: "Salut ! Entre le code de vérification ci-dessous lorsqu'il te sera demandé : 371404. Tu n'as rien demandé ?",
links: [
{
href: "mailto:hey@example.com?subject=Acc%C3%A8s+frauduleux",
text: "contacter le support",
},
],
});
expect(result).toEqual([]);
});
it("flags a code email via a body keyword + code pattern when there are no links", () => {
const result = detectConfirmation({
subject: "Welcome to Acme",
text: "Your verification code is 246810. Enter it to finish signing up.",
links: [],
});
expect(result).toEqual([]);
});
it("does not flag a transactional email with a big number but no code-near-code-word", () => {
const result = detectConfirmation({
subject: "Order confirmed",
text: "Your order 12345678 ships Monday.",
links: [
{ href: "https://shop.example.com/track/12345678", text: "Track" },
],
});
expect(result).toBeNull();
});
it("does not flag a newsletter with numbers but no verification keyword", () => {
const result = detectConfirmation({
subject: "Your 2026 wrapped: 4567 minutes listened",
text: "Here is your year in review with code 9999 highlights.",
links: [{ href: "https://music.example.com/wrapped", text: "See more" }],
});
expect(result).toBeNull();
});
it("dedupes a confirmation link repeated in the body", () => {
const result = detectConfirmation({
subject: "Confirm your subscription",
+41 -9
View File
@@ -5,8 +5,11 @@
* the link-signal patterns, the scoring weights and the threshold.
*
* Returns the ranked candidate confirmation links (top 3) when the combined score
* clears the threshold AND at least one candidate link exists; otherwise null.
* Only http(s) links are ever considered or returned.
* clears the threshold AND at least one candidate link exists. When the email is a
* code-based signup verification (a verification keyword next to an OTP-style code,
* with no clickable link — e.g. "your verification code is 371404") it returns an
* empty array: detected, but nothing to click. Returns null when not a confirmation.
* Only http(s) links are ever considered or returned; the code is never extracted.
*/
export interface DetectConfirmationInput {
@@ -76,6 +79,21 @@ const NEGATIVE = [
const THRESHOLD = 3;
// A verification code (OTP) sitting next to a code-ish word, in either order and
// within a short window — "your verification code is 371404" / "371404 is your
// code". This is the signup-by-code case that has no link to click. Run on the
// already-normalized (lowercased, diacritics-stripped) subject/body. We only test
// for presence to raise the flag; the code value is never captured or surfaced.
const CODE_WORDS = "code|codigo|otp|verif";
const CODE_PROXIMITY = 48;
const CODE_PATTERN = new RegExp(
`(?:${CODE_WORDS})[\\s\\S]{0,${CODE_PROXIMITY}}?\\b\\d{4,8}\\b|\\b\\d{4,8}\\b[\\s\\S]{0,${CODE_PROXIMITY}}?(?:${CODE_WORDS})`,
);
function hasVerificationCode(text: string): boolean {
return CODE_PATTERN.test(text);
}
function normalize(s: string): string {
return s.normalize("NFD").replace(/[̀-ͯ]/g, "").toLowerCase();
}
@@ -115,18 +133,32 @@ export function detectConfirmation(
.filter((l) => l.score > 0)
.sort((a, b) => b.score - a.score);
if (candidates.length === 0) return null;
const subject = stripNegatives(normalize(input.subject));
const text = stripNegatives(normalize(input.text));
const subjectScore = matchesAny(subject, KEYWORDS) ? 2 : 0;
const bodyScore = matchesAny(text, KEYWORDS) ? 1 : 0;
const bestLinkScore = candidates[0].score;
if (subjectScore + bodyScore + bestLinkScore < THRESHOLD) return null;
// Link path: a clickable confirm/verify/subscribe link clears the threshold.
if (candidates.length > 0) {
const bestLinkScore = candidates[0].score;
if (subjectScore + bodyScore + bestLinkScore >= THRESHOLD) {
// Dedupe by href before capping, so a link repeated in the body never
// wastes one of the three surfaced slots.
return [...new Set(candidates.map((c) => c.href))].slice(0, 3);
}
}
// Dedupe by href before capping, so a link repeated in the body never wastes
// one of the three surfaced slots.
return [...new Set(candidates.map((c) => c.href))].slice(0, 3);
// Code path: an OTP-style signup verification with no link to click. Requires
// both a verification keyword (subject or body) and a code-near-code-word
// pattern, so a stray number or a lone keyword cannot cry wolf. Flag it with
// an empty link list — detected, but nothing actionable to surface.
if (
(subjectScore > 0 || bodyScore > 0) &&
(hasVerificationCode(subject) || hasVerificationCode(text))
) {
return [];
}
return null;
}