From c4d591b96254abb0c6deadde8f1b00851a4455d8 Mon Sep 17 00:00:00 2001 From: Julien Herr Date: Mon, 25 May 2026 09:04:36 +0200 Subject: [PATCH] feat(ingest): detect and mark confirmation emails --- src/application/email-processor.test.ts | 54 +++++++++++++++++++++++++ src/application/email-processor.ts | 14 ++++++- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/application/email-processor.test.ts b/src/application/email-processor.test.ts index 1462953..66dffc1 100644 --- a/src/application/email-processor.test.ts +++ b/src/application/email-processor.test.ts @@ -825,6 +825,60 @@ describe("processEmail — feed icon", () => { }); }); +describe("processEmail — confirmation detection", () => { + let env: ReturnType; + + beforeEach(async () => { + env = createMockEnv(); + await env.EMAIL_STORAGE.put( + `feed:${VALID_FEED_ID}:config`, + JSON.stringify({}), + ); + await seedInboundIndex(env, VALID_FEED_ID); + }); + + it("marks a confirmation email and raises pendingConfirmation", async () => { + const result = await processEmail( + makeInput({ + subject: "Please confirm your subscription", + content: + '

Click Confirm

', + }), + env as any, + ); + + expect(result.ok).toBe(true); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + expect(metadata.pendingConfirmation).toBe(true); + expect(metadata.emails[0].confirmation?.links[0]).toBe( + "https://example.com/confirm?token=abc", + ); + }); + + it("does not mark a regular newsletter as a confirmation", async () => { + const result = await processEmail( + makeInput({ + subject: "Weekly Newsletter", + content: "

Here is your weekly digest of news.

", + }), + env as any, + ); + + expect(result.ok).toBe(true); + + const metadata = await env.EMAIL_STORAGE.get( + `feed:${VALID_FEED_ID}:metadata`, + "json", + ); + expect(metadata.pendingConfirmation).toBeFalsy(); + expect(metadata.emails[0].confirmation).toBeUndefined(); + }); +}); + describe("processEmail — unsubscribe capture", () => { let env: ReturnType; diff --git a/src/application/email-processor.ts b/src/application/email-processor.ts index 2afee5a..78fa867 100644 --- a/src/application/email-processor.ts +++ b/src/application/email-processor.ts @@ -5,7 +5,12 @@ import { dispatchFeedEvents } from "../application/feed-events"; import { extractEmailDomain } from "../infrastructure/favicon-fetcher"; import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe"; import { getAttachmentBucket } from "../infrastructure/attachments"; -import { extractInlineCids } from "../infrastructure/html-processor"; +import { + extractInlineCids, + extractLinks, + htmlToText, +} from "../infrastructure/html-processor"; +import { detectConfirmation } from "../domain/confirmation"; import { attachmentIdsForCleanup } from "./feed-cleanup"; import { FeedRepository } from "../infrastructure/feed-repository"; import { BackgroundScheduler } from "../infrastructure/worker"; @@ -182,6 +187,12 @@ async function storeEmail( return false; // signal: skipped (not stored) } + const confirmation = detectConfirmation({ + subject: input.subject, + text: htmlToText(input.content), + links: extractLinks(input.content), + }); + const attachmentBucket = getAttachmentBucket(env); const inlineCids = extractInlineCids(input.content); const storedAttachments: AttachmentData[] = @@ -218,6 +229,7 @@ async function storeEmail( ...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}), ...(messageId ? { messageId } : {}), dedupHash, + ...(confirmation ? { confirmation: { links: confirmation.links } } : {}), }; // Track the latest sender's domain (feed icon) and capture the RFC 8058