mirror of
https://github.com/juherr/kill-the-news.git
synced 2026-06-20 22:03:48 +00:00
feat(app): detect native feeds during email ingestion
Wire extractFeedLinks + detectNativeFeeds into storeEmail so that RSS/Atom/JSON feed <link> tags in the newsletter HTML are detected and stored per-sender on the feed metadata. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -994,3 +994,57 @@ describe("processEmail — unsubscribe capture", () => {
|
|||||||
expect(metadata.unsubscribe).toBeUndefined();
|
expect(metadata.unsubscribe).toBeUndefined();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("native feed detection on ingest", () => {
|
||||||
|
let env: ReturnType<typeof createMockEnv>;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
env = createMockEnv();
|
||||||
|
await env.EMAIL_STORAGE.put(
|
||||||
|
`feed:${VALID_FEED_ID}:config`,
|
||||||
|
JSON.stringify({}),
|
||||||
|
);
|
||||||
|
await seedInboundIndex(env, VALID_FEED_ID);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("stores detected native feeds on the feed metadata (TEST A)", async () => {
|
||||||
|
const result = await processEmail(
|
||||||
|
makeInput({
|
||||||
|
from: "news@blog.example.com",
|
||||||
|
senders: ["news@blog.example.com"],
|
||||||
|
content:
|
||||||
|
'<html><head><link rel="alternate" type="application/rss+xml" href="https://blog.example.com/feed.xml"></head><body>hello</body></html>',
|
||||||
|
}),
|
||||||
|
env as any,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.ok).toBe(true);
|
||||||
|
|
||||||
|
const metadata = (await env.EMAIL_STORAGE.get(
|
||||||
|
`feed:${VALID_FEED_ID}:metadata`,
|
||||||
|
"json",
|
||||||
|
)) as {
|
||||||
|
nativeFeeds?: Record<string, Array<{ url: string; type: string }>>;
|
||||||
|
};
|
||||||
|
expect(Object.values(metadata.nativeFeeds!).flat()).toEqual([
|
||||||
|
{ url: "https://blog.example.com/feed.xml", type: "rss" },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not store nativeFeeds when no feed links are found (TEST B)", async () => {
|
||||||
|
const result = await processEmail(
|
||||||
|
makeInput({
|
||||||
|
content: "<p>no feed here</p>",
|
||||||
|
}),
|
||||||
|
env as any,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.ok).toBe(true);
|
||||||
|
|
||||||
|
const metadata = (await env.EMAIL_STORAGE.get(
|
||||||
|
`feed:${VALID_FEED_ID}:metadata`,
|
||||||
|
"json",
|
||||||
|
)) as { nativeFeeds?: Record<string, unknown> };
|
||||||
|
expect(metadata.nativeFeeds).toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -6,11 +6,13 @@ import { extractEmailDomain } from "../infrastructure/favicon-fetcher";
|
|||||||
import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe";
|
import { parseOneClickUnsubscribe } from "../infrastructure/unsubscribe";
|
||||||
import { getAttachmentBucket } from "../infrastructure/attachments";
|
import { getAttachmentBucket } from "../infrastructure/attachments";
|
||||||
import {
|
import {
|
||||||
|
extractFeedLinks,
|
||||||
extractInlineCids,
|
extractInlineCids,
|
||||||
extractLinks,
|
extractLinks,
|
||||||
htmlToText,
|
htmlToText,
|
||||||
} from "../infrastructure/html-processor";
|
} from "../infrastructure/html-processor";
|
||||||
import { detectConfirmation } from "../domain/confirmation";
|
import { detectConfirmation } from "../domain/confirmation";
|
||||||
|
import { detectNativeFeeds } from "../domain/native-feed";
|
||||||
import { attachmentIdsForCleanup } from "./feed-cleanup";
|
import { attachmentIdsForCleanup } from "./feed-cleanup";
|
||||||
import { FeedRepository } from "../infrastructure/feed-repository";
|
import { FeedRepository } from "../infrastructure/feed-repository";
|
||||||
import { BackgroundScheduler } from "../infrastructure/worker";
|
import { BackgroundScheduler } from "../infrastructure/worker";
|
||||||
@@ -18,6 +20,13 @@ import { Feed } from "../domain/feed.aggregate";
|
|||||||
import { logger } from "../infrastructure/logger";
|
import { logger } from "../infrastructure/logger";
|
||||||
import { FEED_MAX_BYTES } from "../config/constants";
|
import { FEED_MAX_BYTES } from "../config/constants";
|
||||||
|
|
||||||
|
// Best-effort site base for absolutizing a sender's relative feed link.
|
||||||
|
function iconBase(from: string): string {
|
||||||
|
const at = from.lastIndexOf("@");
|
||||||
|
const domain = at >= 0 ? from.slice(at + 1).trim() : "";
|
||||||
|
return domain ? `https://${domain}` : "";
|
||||||
|
}
|
||||||
|
|
||||||
export interface RawAttachment {
|
export interface RawAttachment {
|
||||||
filename: string;
|
filename: string;
|
||||||
contentType: string;
|
contentType: string;
|
||||||
@@ -193,6 +202,10 @@ async function storeEmail(
|
|||||||
links: extractLinks(input.content),
|
links: extractLinks(input.content),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const nativeFeedList = detectNativeFeeds(
|
||||||
|
extractFeedLinks(input.content, iconBase(input.from)),
|
||||||
|
);
|
||||||
|
|
||||||
const attachmentBucket = getAttachmentBucket(env);
|
const attachmentBucket = getAttachmentBucket(env);
|
||||||
const inlineCids = extractInlineCids(input.content);
|
const inlineCids = extractInlineCids(input.content);
|
||||||
const storedAttachments: AttachmentData[] =
|
const storedAttachments: AttachmentData[] =
|
||||||
@@ -238,13 +251,9 @@ async function storeEmail(
|
|||||||
// one-click unsubscribe link, keyed by sender so each newsletter keeps its
|
// one-click unsubscribe link, keyed by sender so each newsletter keeps its
|
||||||
// own latest URL (fired when the feed is deleted).
|
// own latest URL (fired when the feed is deleted).
|
||||||
const iconDomain = extractEmailDomain(input.from);
|
const iconDomain = extractEmailDomain(input.from);
|
||||||
|
const senderKey = input.senders[0] || iconDomain || input.from;
|
||||||
const unsubUrl = parseOneClickUnsubscribe(input.headers ?? {});
|
const unsubUrl = parseOneClickUnsubscribe(input.headers ?? {});
|
||||||
const unsub = unsubUrl
|
const unsub = unsubUrl ? { senderKey, url: unsubUrl } : undefined;
|
||||||
? {
|
|
||||||
senderKey: input.senders[0] || iconDomain || input.from,
|
|
||||||
url: unsubUrl,
|
|
||||||
}
|
|
||||||
: undefined;
|
|
||||||
|
|
||||||
const maxBytes =
|
const maxBytes =
|
||||||
parseInt(env.FEED_MAX_SIZE_BYTES ?? "", 10) || FEED_MAX_BYTES;
|
parseInt(env.FEED_MAX_SIZE_BYTES ?? "", 10) || FEED_MAX_BYTES;
|
||||||
@@ -253,6 +262,9 @@ async function storeEmail(
|
|||||||
maxBytes,
|
maxBytes,
|
||||||
iconDomain: iconDomain ?? undefined,
|
iconDomain: iconDomain ?? undefined,
|
||||||
unsub,
|
unsub,
|
||||||
|
...(nativeFeedList.length > 0
|
||||||
|
? { nativeFeeds: { senderKey, feeds: nativeFeedList } }
|
||||||
|
: {}),
|
||||||
});
|
});
|
||||||
|
|
||||||
const r2Deletions =
|
const r2Deletions =
|
||||||
|
|||||||
Reference in New Issue
Block a user