fix(feed): use permalink URL as Atom entry id, strip mso-* inline styles

- Entry <id> was a non-URL string (timestamp + base64 snippet), which
  is invalid per the Atom spec; now uses the entry permalink URL which
  is both valid and stable across feed regeneration
- Strip mso-* properties from inline style attributes in extracted body
  content to eliminate the feed validator DangerousStyleAttr warning
  caused by Microsoft Office HTML in newsletter emails

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Julien Herr
2026-05-22 18:43:06 +02:00
parent 4428f35dd4
commit afed4464cf
+20 -8
View File
@@ -17,11 +17,23 @@ function parseFromAddress(from: string): { name: string; email?: string } {
// the body fragment in <description>/<content:encoded>, not a full document.
export function extractBodyContent(html: string): string {
const withClose = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
if (withClose) return withClose[1];
// Some HTML emails omit </body>; capture everything after the opening tag
const withoutClose = html.match(/<body[^>]*>([\s\S]*)/i);
if (withoutClose) return withoutClose[1].replace(/<\/html>\s*$/i, "");
return html;
const body = withClose
? withClose[1]
: (() => {
const withoutClose = html.match(/<body[^>]*>([\s\S]*)/i);
return withoutClose
? withoutClose[1].replace(/<\/html>\s*$/i, "")
: html;
})();
// Strip mso-* properties from inline styles (Office HTML — triggers feed validator warnings)
return body.replace(/\bstyle="([^"]*)"/gi, (_match, style: string) => {
const cleaned = style
.split(";")
.map((p) => p.trim())
.filter((p) => p && !/^mso-/i.test(p))
.join("; ");
return cleaned ? `style="${cleaned}"` : "";
});
}
function buildFeed(
@@ -56,13 +68,13 @@ function buildFeed(
});
for (const email of emails) {
const uniqueId = `${email.receivedAt}-${Buffer.from(email.subject).toString("base64").substring(0, 10)}`;
const entryUrl = `${baseUrl}/entries/${feedId}/${email.receivedAt}`;
const firstAttachment = email.attachments?.[0];
const bodyContent = extractBodyContent(email.content);
feed.addItem({
title: email.subject,
id: uniqueId,
link: `${baseUrl}/entries/${feedId}/${email.receivedAt}`,
id: entryUrl,
link: entryUrl,
description: bodyContent,
content: bodyContent,
author: [parseFromAddress(email.from)],