/** * Client-side repair for messages stored with raw MIME in body_text/body_html * (before backend decode fix). Mirrors imap.RepairStoredBodies heuristics. */ import { stripHiddenEmailHtml, stripInvisibleTextRuns, } from "@/lib/strip-hidden-email-html" function looksLikeRawMime(s: string): boolean { if (!s.includes("Content-Type:")) return false return ( s.includes("Content-Transfer-Encoding:") || (s.includes("--") && s.toLowerCase().includes("multipart")) ) } function charsetFromContentType(contentType: string): string { const match = contentType.match(/charset\s*=\s*"?([^";\s]+)"?/i) return match?.[1]?.trim().toLowerCase() ?? "" } function isUtf8Charset(charset: string): boolean { return ( charset === "" || charset === "utf-8" || charset === "utf8" || charset === "unicode-1-1-utf-8" ) } function decodeBytesToUtf8(bytes: Uint8Array, charset = ""): string { if (bytes.length === 0) return "" const normalized = charset.toLowerCase() if (normalized && !isUtf8Charset(normalized)) { try { return new TextDecoder(normalized).decode(bytes) } catch { /* fall through */ } } try { return new TextDecoder("utf-8", { fatal: true }).decode(bytes) } catch { try { return new TextDecoder("windows-1252").decode(bytes) } catch { try { return new TextDecoder("iso-8859-1").decode(bytes) } catch { return "" } } } } const UTF8_MOJIBAKE_PAIR_RE = /[\u00C2\u00C3][\u0080-\u00BF]/ function looksLikeUtf8Mojibake(s: string): boolean { return UTF8_MOJIBAKE_PAIR_RE.test(s) } /** UTF-8 misread as Latin-1: "réactivité" → "réactivité". */ function repairUtf8Mojibake(s: string): string { if (!s || !looksLikeUtf8Mojibake(s)) return s let out = "" for (let i = 0; i < s.length; i++) { const code = s.charCodeAt(i) if ((code === 0xc2 || code === 0xc3) && i + 1 < s.length) { const next = s.charCodeAt(i + 1) if (next >= 0x80 && next <= 0xbf) { const bytes = new Uint8Array([code, next]) try { out += new TextDecoder("utf-8", { fatal: true }).decode(bytes) i++ continue } catch { /* keep raw chars */ } } } out += s[i]! } if (out === s) return s return repairLoneMojibakeLeaders(out) } function repairLoneMojibakeLeaders(s: string): string { return s .replace(/\u00C3(?=[\s,.;:!?])/g, "à") .replace(/\u00C2(?=[\s,.;:!?])/g, "Â") } function repairLegacyCharsetString(s: string): string { if (!s) return s let repaired = s try { new TextDecoder("utf-8", { fatal: true }).decode( Uint8Array.from(s, (c) => c.charCodeAt(0) & 0xff) ) } catch { const bytes = Uint8Array.from(s, (c) => c.charCodeAt(0) & 0xff) repaired = decodeBytesToUtf8(bytes) } return repairUtf8Mojibake(repaired) } function decodeBase64Part(encoded: string, charset = ""): string { const clean = encoded.replace(/[\r\n\t ]/g, "") try { if (typeof atob !== "undefined") { const bytes = Uint8Array.from(atob(clean), (c) => c.charCodeAt(0)) return decodeBytesToUtf8(bytes, charset) } } catch { return "" } return "" } function parseEmbeddedMime(raw: string): { text: string; html: string } | null { if (!looksLikeRawMime(raw)) return null const boundaryMatch = raw.match(/boundary\s*=\s*"?([^";\s]+)"?/i) const boundary = boundaryMatch?.[1] ?? (() => { for (const line of raw.split(/\r?\n/)) { const t = line.trim() if (t.startsWith("--") && !t.endsWith("--") && t.length > 2) { return t.slice(2).trim() } } return "" })() if (!boundary) return null const parts = raw.split(new RegExp(`--${escapeRegExp(boundary)}(?:--)?\\s*\\r?\\n`)) let text = "" let html = "" for (const part of parts) { const trimmed = part.trim() if (!trimmed || !trimmed.includes("Content-Type:")) continue const headerEnd = trimmed.search(/\r?\n\r?\n/) if (headerEnd < 0) continue const headers = trimmed.slice(0, headerEnd) const body = trimmed.slice(headerEnd).replace(/^[\r\n]+/, "") const typeHeader = headers.match(/Content-Type:\s*([^\r\n]+)/i)?.[1] ?? "" const mediaType = typeHeader.split(";")[0]?.trim().toLowerCase() ?? "" const charset = charsetFromContentType(typeHeader) const encMatch = headers.match(/Content-Transfer-Encoding:\s*([^\r\n]+)/i) const encoding = encMatch?.[1]?.trim().toLowerCase() ?? "" let decoded = body.trim() if (encoding === "base64") { decoded = decodeBase64Part(decoded, charset) } else if (encoding === "quoted-printable" || looksLikeQuotedPrintable(decoded)) { decoded = decodeQuotedPrintableIfNeeded(decoded) } else { const bytes = Uint8Array.from(decoded, (c) => c.charCodeAt(0) & 0xff) decoded = decodeBytesToUtf8(bytes, charset) } if (mediaType === "text/plain" && !text) text = decoded if (mediaType === "text/html" && !html) html = decoded } if (!text && !html) return null if (looksLikeRawMime(text) || looksLikeRawMime(html)) return null return { text, html } } function escapeRegExp(s: string): string { return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") } function looksLikeBareBase64(s: string): boolean { const clean = s.replace(/[\r\n\t ]/g, "") if (clean.length < 24 || clean.length % 4 !== 0) return false return /^[A-Za-z0-9+/]+=*$/.test(clean) } function looksLikeQuotedPrintable(s: string): boolean { if (s.includes("=\r\n") || s.includes("=\n")) return true if (s.includes("=3D") || s.includes("=C3=") || s.includes("=E2=")) return true return (s.match(/=[0-9A-Fa-f]{2}/g)?.length ?? 0) >= 3 } function decodeQuotedPrintableIfNeeded(s: string): string { if (!looksLikeQuotedPrintable(s)) return s try { const bytes: number[] = [] const normalized = s.replace(/\r\n/g, "\n") for (let i = 0; i < normalized.length; ) { const ch = normalized[i] if (ch === "=") { if (normalized[i + 1] === "\n") { i += 2 continue } const hex = normalized.slice(i + 1, i + 3) if (/^[0-9A-Fa-f]{2}$/.test(hex)) { bytes.push(parseInt(hex, 16)) i += 3 continue } } bytes.push(ch.charCodeAt(0)) i += 1 } return decodeBytesToUtf8(new Uint8Array(bytes)) } catch { return s } } function decodeBareBase64IfNeeded(s: string): string { if (!looksLikeBareBase64(s)) return s const decoded = decodeBase64Part(s) if (!decoded || decoded === s) return s return decoded } export function repairMimeBodies( bodyText?: string, bodyHtml?: string ): { bodyText?: string; bodyHtml?: string } { let text = repairLegacyCharsetString(bodyText?.trim() ?? "") let html = repairLegacyCharsetString(bodyHtml?.trim() ?? "") text = decodeQuotedPrintableIfNeeded(text) html = decodeQuotedPrintableIfNeeded(html) text = decodeBareBase64IfNeeded(text) html = decodeBareBase64IfNeeded(html) html = stripHiddenEmailHtml(html) text = stripInvisibleTextRuns(text) if (!looksLikeRawMime(text) && !looksLikeRawMime(html)) { return finalizeMimeBodies(text, bodyText, html, bodyHtml) } const parsed = parseEmbeddedMime(text || html) if (!parsed) return finalizeMimeBodies(text, bodyText, html, bodyHtml) return finalizeMimeBodies( parsed.text || text || bodyText, bodyText, parsed.html || html || bodyHtml, bodyHtml ) } function finalizeMimeBodies( text: string | undefined, bodyText: string | undefined, html: string | undefined, bodyHtml: string | undefined ): { bodyText?: string; bodyHtml?: string } { const outText = repairUtf8Mojibake(repairLegacyCharsetString(text?.trim() ?? "")) const outHtml = repairUtf8Mojibake(repairLegacyCharsetString(html?.trim() ?? "")) return { bodyText: outText || bodyText, bodyHtml: outHtml || bodyHtml, } } const SEPARATOR_RUN_RE = /-{8,}|_{8,}|={8,}|\*{8,}/g function normalizeSnippetMatchText(s: string): string { return s .toLowerCase() .replace(/[[\]()·]/g, " ") .replace(/\s+/g, " ") .trim() } function isViewInBrowserSnippet(s: string): boolean { const norm = normalizeSnippetMatchText(s) const phrases = [ "afficher dans le navigateur", "view in browser", "voir ce message en ligne", "version en ligne", ] for (const phrase of phrases) { if (!norm.includes(phrase)) continue const rest = norm.replace(phrase, "").trim() if (rest.length <= 20 || s.length <= 90) return true } return false } function isSnippetLegalFooter(s: string): boolean { const lower = s.toLowerCase() if (!lower.includes("http") && !lower.includes("www.")) return false const markers = [ "sas ", "sarl ", "rue ", " bp ", "kellermann", "ovh.com", "www.ovh", ] let hits = 0 for (const m of markers) { if (lower.includes(m)) hits++ } if (hits >= 2) return true return lower.startsWith("sas ") && lower.includes("http") } function hasLeadingSeparatorRun(s: string): boolean { const trimmed = s.trimStart() let run = 0 for (const ch of trimmed) { if ("-_*='=·—".includes(ch)) run++ else break } return run >= 12 } function looksLikeCssSnippet(s: string): boolean { const lower = s.toLowerCase() return ( lower.includes(":root") || lower.includes("color-scheme:") || lower.includes("@media") || lower.includes("@font-face") || lower.includes("font-family:") || (lower.includes("facebook") && lower.includes(":root")) || (lower.includes("meta for business") && s.includes("{")) || lower.includes("/*//") || lower.includes("||//") || (s.includes("{") && s.includes("}") && s.includes(";") && (lower.includes("font-") || lower.includes("margin:") || lower.includes("padding:"))) || /^\s*\/\*/.test(s) ) } function hasUndecodedHtmlEntities(s: string): boolean { return HTML_ENTITY_RE.test(s) } function isMostlySeparatorLine(s: string): boolean { if (s.length < 8) return false const sep = (s.match(/[-_*=·—|]/g) ?? []).length return sep / [...s].length >= 0.55 } function isSnippetBoilerplate(s: string): boolean { const t = stripHtmlTagsForSnippet(s.trim()) if (!t || t.length < 4) return true if (looksLikeCssSnippet(t) || isMostlySeparatorLine(t) || hasLeadingSeparatorRun(t)) return true if (isViewInBrowserSnippet(t) || isSnippetLegalFooter(t)) return true if (/<[^>]+>/.test(s)) return true if (hasUndecodedHtmlEntities(s)) return true const lower = t.toLowerCase() const phrases = ["si vous ne visualisez pas", "cliquer ici", "click here"] if (phrases.some((p) => lower.includes(p)) && t.length < 200) return true if ( (lower.startsWith("http://") || lower.startsWith("https://")) && t.length < 120 ) return true const letters = (t.match(/\p{L}|\p{N}/gu) ?? []).length return letters / [...t].length < 0.35 } function pickBestSnippetLine(lines: string[]): string { let best = "" let bestScore = -1 for (const line of lines) { const t = stripHtmlTagsForSnippet(line.trim()) if (!t || isSnippetBoilerplate(t)) continue const letters = (t.match(/\p{L}/gu) ?? []).length if (letters < 8) continue let score = letters * 4 if (t.length > 40 && t.length < 280) score += 40 if (score > bestScore) { bestScore = score best = t } } return best } const HTML_ENTITY_RE = /&(?:#x?[0-9a-f]+|[a-z][a-z0-9]{1,8});/gi function decodeHtmlEntitiesForSnippet(s: string): string { let out = s for (let i = 0; i < 4; i++) { const next = out.replace(HTML_ENTITY_RE, (entity) => { if (typeof document !== "undefined") { const el = document.createElement("textarea") el.innerHTML = entity return el.value } const lower = entity.toLowerCase() if (lower === " ") return " " if (lower === "&") return "&" if (lower === "<") return "<" if (lower === ">") return ">" if (lower === """) return '"' if (lower === "'") return "'" const dec = entity.match(/^&#(\d+);$/i) if (dec) return String.fromCodePoint(Number.parseInt(dec[1], 10)) const hex = entity.match(/^&#x([0-9a-f]+);$/i) if (hex) return String.fromCodePoint(Number.parseInt(hex[1], 16)) return entity }) if (next === out) break out = next } return out } function stripSnippetCssTail(s: string): string { for (const marker of ["/*//", "/*", "//||"]) { const idx = s.indexOf(marker) if (idx >= 0) { const head = s.slice(0, idx).trim() if (head.length >= 12) return head.replace(/[ /*\-_|]+$/g, "") } } return s } function splitSnippetSegments(s: string): string[] { return s .replace(/\r\n/g, "\n") .split("\n") .flatMap((line) => line .split(SEPARATOR_RUN_RE) .map((p) => stripHtmlTagsForSnippet(p.trim())) .filter(Boolean) ) } function stripHtmlTagsForSnippet(s: string): string { let stripped = s .replace(/]*>[\s\S]*?<\/style>/gi, " ") .replace(/<[^>]+>/g, " ") stripped = decodeHtmlEntitiesForSnippet(stripped) stripped = stripSnippetCssTail(stripped) return stripped.replace(/\s+/g, " ").trim() } function polishSnippetPreview(snippet: string): string { const cleaned = stripHtmlTagsForSnippet(snippet) const best = pickBestSnippetLine(splitSnippetSegments(cleaned)) if (best) return best.length > 200 ? best.slice(0, 200) : best if (isSnippetBoilerplate(cleaned)) return "" return cleaned.length > 200 ? cleaned.slice(0, 200) : cleaned } /** List/search preview stored as undecoded base64 or marketing boilerplate. */ export function repairSnippet(snippet?: string): string | undefined { if (!snippet?.trim()) return snippet const trimmed = snippet.trim() const qp = decodeQuotedPrintableIfNeeded(trimmed) const decoded = decodeBareBase64IfNeeded(qp) const raw = decoded !== trimmed ? decoded : snippet const cleaned = stripInvisibleTextRuns(raw) const polished = repairUtf8Mojibake(polishSnippetPreview(cleaned)) return polished || undefined }