Files
kill-the-news/src/infrastructure/favicon-fetcher.ts
T
Julien Herr f823a5f222 refactor: move KV repositories to infrastructure (Track P — points 2, 6c)
Make the domain stop depending on infrastructure ("imports point inward").

- Point 2: relocate the four KV adapters (FeedRepository, IconRepository,
  WebSubSubscriptionRepository, CountersRepository) from domain/ to
  infrastructure/, where the logger import is legitimate. The domain now keeps
  only the pure key schema (feed-keys.ts), the Feed aggregate and value objects;
  it imports nothing outward. Deliberately no hand-rolled 24-method port
  interface (YAGNI without DI) — relocation alone fixes the direction.
- Point 6c: EmailParser.extractFeedId now returns a validated FeedId value
  object instead of a raw string, so the most untrusted input (an inbound
  recipient address) is guarded at the parse boundary and no longer round-trips
  through FeedId.fromTrusted in the ingest path.

All import paths updated; CLAUDE.md source layout/KV-schema notes updated.
351 tests pass; tsc --noEmit clean.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-24 10:02:23 +02:00

125 lines
3.7 KiB
TypeScript

import { Env } from "../types";
import {
ICON_FETCH_TIMEOUT_MS,
ICON_TTL_SECONDS,
MAX_ICON_BYTES,
} from "../config/constants";
import { IconRepository } from "./icon-repository";
import { EmailAddress } from "../domain/value-objects/email-address";
import { logger } from "./logger";
interface IconRecord {
data: string | null; // base64 icon bytes, or null for a negative cache entry
contentType: string;
}
/**
* Extract the lowercased domain from a `from` value, accepting either a bare
* address (`a@b.com`) or a display form (`Name <a@b.com>`). Returns null when
* no plausible address can be parsed.
*/
export function extractEmailDomain(from: string): string | null {
return EmailAddress.parse(from)?.domain.value ?? null;
}
function arrayBufferToBase64(buffer: ArrayBuffer): string {
const bytes = new Uint8Array(buffer);
let binary = "";
const chunkSize = 0x8000;
for (let i = 0; i < bytes.length; i += chunkSize) {
binary += String.fromCharCode(...bytes.subarray(i, i + chunkSize));
}
return btoa(binary);
}
function base64ToArrayBuffer(base64: string): ArrayBuffer {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return bytes.buffer;
}
async function fetchIconFrom(
url: string,
): Promise<{ buffer: ArrayBuffer; contentType: string } | null> {
const res = await fetch(url, {
redirect: "follow",
signal: AbortSignal.timeout(ICON_FETCH_TIMEOUT_MS),
headers: { "User-Agent": "kill-the-news/1.0" },
});
if (!res.ok) return null;
const contentType = res.headers.get("content-type") ?? "";
if (!contentType.startsWith("image/")) return null;
const buffer = await res.arrayBuffer();
if (buffer.byteLength === 0 || buffer.byteLength > MAX_ICON_BYTES)
return null;
return { buffer, contentType: contentType.split(";")[0].trim() };
}
async function resolveIcon(
domain: string,
): Promise<{ buffer: ArrayBuffer; contentType: string } | null> {
const candidates = [
`https://${domain}/favicon.ico`,
`https://icons.duckduckgo.com/ip3/${domain}.ico`,
];
for (const url of candidates) {
try {
const icon = await fetchIconFrom(url);
if (icon) return icon;
} catch {
// Try the next candidate; network/timeout errors must never propagate.
}
}
return null;
}
/**
* Resolve and cache the favicon for a sender domain. Idempotent and never
* throws: if a (success or negative) cache entry already exists it returns
* immediately, so callers can fire this on every email without refetching.
* The KV TTL is the sole expiry mechanism.
*/
export async function cacheFaviconForDomain(
domain: string,
env: Env,
): Promise<void> {
try {
const repo = IconRepository.from(env);
const existing = await repo.getText(domain);
if (existing !== null) return; // present (incl. negative) → nothing to do
const icon = await resolveIcon(domain);
const record: IconRecord = icon
? {
data: arrayBufferToBase64(icon.buffer),
contentType: icon.contentType,
}
: { data: null, contentType: "" };
await repo.put(domain, JSON.stringify(record), ICON_TTL_SECONDS);
} catch (error) {
logger.warn("Favicon cache failed", { domain, error: String(error) });
}
}
/**
* Read a cached icon for a domain. Returns null on a miss or a negative entry.
*/
export async function getCachedIcon(
domain: string,
env: Env,
): Promise<{ bytes: ArrayBuffer; contentType: string } | null> {
const record = await IconRepository.from(env).getJson<IconRecord>(domain);
if (!record || record.data === null) return null;
return {
bytes: base64ToArrayBuffer(record.data),
contentType: record.contentType,
};
}