feat(ingest): detect and mark confirmation emails

This commit is contained in:
Julien Herr
2026-05-25 09:04:36 +02:00
parent 36d58ade48
commit c4d591b962
2 changed files with 67 additions and 1 deletions
+54
View File
@@ -825,6 +825,60 @@ describe("processEmail — feed icon", () => {
}); });
}); });
describe("processEmail — confirmation detection", () => {
let env: ReturnType<typeof createMockEnv>;
beforeEach(async () => {
env = createMockEnv();
await env.EMAIL_STORAGE.put(
`feed:${VALID_FEED_ID}:config`,
JSON.stringify({}),
);
await seedInboundIndex(env, VALID_FEED_ID);
});
it("marks a confirmation email and raises pendingConfirmation", async () => {
const result = await processEmail(
makeInput({
subject: "Please confirm your subscription",
content:
'<p>Click <a href="https://example.com/confirm?token=abc">Confirm</a></p>',
}),
env as any,
);
expect(result.ok).toBe(true);
const metadata = await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
);
expect(metadata.pendingConfirmation).toBe(true);
expect(metadata.emails[0].confirmation?.links[0]).toBe(
"https://example.com/confirm?token=abc",
);
});
it("does not mark a regular newsletter as a confirmation", async () => {
const result = await processEmail(
makeInput({
subject: "Weekly Newsletter",
content: "<p>Here is your weekly digest of news.</p>",
}),
env as any,
);
expect(result.ok).toBe(true);
const metadata = await env.EMAIL_STORAGE.get(
`feed:${VALID_FEED_ID}:metadata`,
"json",
);
expect(metadata.pendingConfirmation).toBeFalsy();
expect(metadata.emails[0].confirmation).toBeUndefined();
});
});
describe("processEmail — unsubscribe capture", () => { describe("processEmail — unsubscribe capture", () => {
let env: ReturnType<typeof createMockEnv>; let env: ReturnType<typeof createMockEnv>;
+13 -1
View File
@@ -5,7 +5,12 @@ import { dispatchFeedEvents } from "../application/feed-events";
import { extractEmailDomain } from "../infrastructure/favicon-fetcher"; import { extractEmailDomain } from "../infrastructure/favicon-fetcher";
import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe"; import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe";
import { getAttachmentBucket } from "../infrastructure/attachments"; import { getAttachmentBucket } from "../infrastructure/attachments";
import { extractInlineCids } from "../infrastructure/html-processor"; import {
extractInlineCids,
extractLinks,
htmlToText,
} from "../infrastructure/html-processor";
import { detectConfirmation } from "../domain/confirmation";
import { attachmentIdsForCleanup } from "./feed-cleanup"; import { attachmentIdsForCleanup } from "./feed-cleanup";
import { FeedRepository } from "../infrastructure/feed-repository"; import { FeedRepository } from "../infrastructure/feed-repository";
import { BackgroundScheduler } from "../infrastructure/worker"; import { BackgroundScheduler } from "../infrastructure/worker";
@@ -182,6 +187,12 @@ async function storeEmail(
return false; // signal: skipped (not stored) return false; // signal: skipped (not stored)
} }
const confirmation = detectConfirmation({
subject: input.subject,
text: htmlToText(input.content),
links: extractLinks(input.content),
});
const attachmentBucket = getAttachmentBucket(env); const attachmentBucket = getAttachmentBucket(env);
const inlineCids = extractInlineCids(input.content); const inlineCids = extractInlineCids(input.content);
const storedAttachments: AttachmentData[] = const storedAttachments: AttachmentData[] =
@@ -218,6 +229,7 @@ async function storeEmail(
...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}), ...(inlineIds.length > 0 ? { inlineAttachmentIds: inlineIds } : {}),
...(messageId ? { messageId } : {}), ...(messageId ? { messageId } : {}),
dedupHash, dedupHash,
...(confirmation ? { confirmation: { links: confirmation.links } } : {}),
}; };
// Track the latest sender's domain (feed icon) and capture the RFC 8058 // Track the latest sender's domain (feed icon) and capture the RFC 8058