diff --git a/lib/mail-mime-body.ts b/lib/mail-mime-body.ts index a94e9ba..90e25b3 100644 --- a/lib/mail-mime-body.ts +++ b/lib/mail-mime-body.ts @@ -213,36 +213,110 @@ export function repairMimeBodies( } } +const SEPARATOR_RUN_RE = /-{8,}|_{8,}|={8,}|\*{8,}/g + +function normalizeSnippetMatchText(s: string): string { + return s + .toLowerCase() + .replace(/[[\]()·]/g, " ") + .replace(/\s+/g, " ") + .trim() +} + +function isViewInBrowserSnippet(s: string): boolean { + const norm = normalizeSnippetMatchText(s) + const phrases = [ + "afficher dans le navigateur", + "view in browser", + "voir ce message en ligne", + "version en ligne", + ] + for (const phrase of phrases) { + if (!norm.includes(phrase)) continue + const rest = norm.replace(phrase, "").trim() + if (rest.length <= 20 || s.length <= 90) return true + } + return false +} + +function isSnippetLegalFooter(s: string): boolean { + const lower = s.toLowerCase() + if (!lower.includes("http") && !lower.includes("www.")) return false + const markers = [ + "sas ", + "sarl ", + "rue ", + " bp ", + "kellermann", + "ovh.com", + "www.ovh", + ] + let hits = 0 + for (const m of markers) { + if (lower.includes(m)) hits++ + } + if (hits >= 2) return true + return lower.startsWith("sas ") && lower.includes("http") +} + +function hasLeadingSeparatorRun(s: string): boolean { + const trimmed = s.trimStart() + let run = 0 + for (const ch of trimmed) { + if ("-_*='=·—".includes(ch)) run++ + else break + } + return run >= 12 +} + function looksLikeCssSnippet(s: string): boolean { const lower = s.toLowerCase() return ( lower.includes(":root") || lower.includes("color-scheme:") || lower.includes("@media") || - (s.includes("{") && s.includes("}") && s.split(";").length >= 3) || + lower.includes("@font-face") || + lower.includes("font-family:") || + (lower.includes("facebook") && lower.includes(":root")) || + (lower.includes("meta for business") && s.includes("{")) || + lower.includes("/*//") || + lower.includes("||//") || + (s.includes("{") && + s.includes("}") && + s.includes(";") && + (lower.includes("font-") || + lower.includes("margin:") || + lower.includes("padding:"))) || /^\s*\/\*/.test(s) ) } +function hasUndecodedHtmlEntities(s: string): boolean { + return HTML_ENTITY_RE.test(s) +} + function isMostlySeparatorLine(s: string): boolean { if (s.length < 8) return false - const sep = (s.match(/[-_*=·—]/g) ?? []).length - return sep / s.length >= 0.6 + const sep = (s.match(/[-_*=·—|]/g) ?? []).length + return sep / [...s].length >= 0.55 } function isSnippetBoilerplate(s: string): boolean { const t = stripHtmlTagsForSnippet(s.trim()) if (!t || t.length < 4) return true - if (looksLikeCssSnippet(t) || isMostlySeparatorLine(t)) return true + if (looksLikeCssSnippet(t) || isMostlySeparatorLine(t) || hasLeadingSeparatorRun(t)) + return true + if (isViewInBrowserSnippet(t) || isSnippetLegalFooter(t)) return true if (/<[^>]+>/.test(s)) return true + if (hasUndecodedHtmlEntities(s)) return true const lower = t.toLowerCase() - const phrases = [ - "afficher dans le navigateur", - "view in browser", - "si vous ne visualisez pas", - "cliquer ici", - ] - if (phrases.some((p) => lower.includes(p)) && t.length < 160) return true + const phrases = ["si vous ne visualisez pas", "cliquer ici", "click here"] + if (phrases.some((p) => lower.includes(p)) && t.length < 200) return true + if ( + (lower.startsWith("http://") || lower.startsWith("https://")) && + t.length < 120 + ) + return true const letters = (t.match(/\p{L}|\p{N}/gu) ?? []).length return letters / [...t].length < 0.35 } @@ -251,7 +325,7 @@ function pickBestSnippetLine(lines: string[]): string { let best = "" let bestScore = -1 for (const line of lines) { - const t = line.trim() + const t = stripHtmlTagsForSnippet(line.trim()) if (!t || isSnippetBoilerplate(t)) continue const letters = (t.match(/\p{L}/gu) ?? []).length if (letters < 8) continue @@ -265,27 +339,72 @@ function pickBestSnippetLine(lines: string[]): string { return best } +const HTML_ENTITY_RE = + /&(?:#x?[0-9a-f]+|[a-z][a-z0-9]{1,8});/gi + +function decodeHtmlEntitiesForSnippet(s: string): string { + let out = s + for (let i = 0; i < 4; i++) { + const next = out.replace(HTML_ENTITY_RE, (entity) => { + if (typeof document !== "undefined") { + const el = document.createElement("textarea") + el.innerHTML = entity + return el.value + } + const lower = entity.toLowerCase() + if (lower === " ") return " " + if (lower === "&") return "&" + if (lower === "<") return "<" + if (lower === ">") return ">" + if (lower === """) return '"' + if (lower === "'") return "'" + const dec = entity.match(/^&#(\d+);$/i) + if (dec) return String.fromCodePoint(Number.parseInt(dec[1], 10)) + const hex = entity.match(/^&#x([0-9a-f]+);$/i) + if (hex) return String.fromCodePoint(Number.parseInt(hex[1], 16)) + return entity + }) + if (next === out) break + out = next + } + return out +} + +function stripSnippetCssTail(s: string): string { + for (const marker of ["/*//", "/*", "//||"]) { + const idx = s.indexOf(marker) + if (idx >= 0) { + const head = s.slice(0, idx).trim() + if (head.length >= 12) return head.replace(/[ /*\-_|]+$/g, "") + } + } + return s +} + +function splitSnippetSegments(s: string): string[] { + return s + .replace(/\r\n/g, "\n") + .split("\n") + .flatMap((line) => + line + .split(SEPARATOR_RUN_RE) + .map((p) => stripHtmlTagsForSnippet(p.trim())) + .filter(Boolean) + ) +} + function stripHtmlTagsForSnippet(s: string): string { - const stripped = s + let stripped = s .replace(/]*>[\s\S]*?<\/style>/gi, " ") .replace(/<[^>]+>/g, " ") - .replace(/ /gi, " ") - .replace(/&/gi, "&") - .replace(/</gi, "<") - .replace(/>/gi, ">") - .replace(/"/gi, '"') - .replace(/&#(\d+);/g, (_, code) => - String.fromCodePoint(Number.parseInt(code, 10)) - ) - .replace(/\s+/g, " ") - .trim() - return stripped + stripped = decodeHtmlEntitiesForSnippet(stripped) + stripped = stripSnippetCssTail(stripped) + return stripped.replace(/\s+/g, " ").trim() } function polishSnippetPreview(snippet: string): string { const cleaned = stripHtmlTagsForSnippet(snippet) - const lines = cleaned.replace(/\r\n/g, "\n").split("\n") - const best = pickBestSnippetLine(lines) + const best = pickBestSnippetLine(splitSnippetSegments(cleaned)) if (best) return best.length > 200 ? best.slice(0, 200) : best if (isSnippetBoilerplate(cleaned)) return "" return cleaned.length > 200 ? cleaned.slice(0, 200) : cleaned