ultisuite-client/lib/mail-mime-body.ts
R3D347HR4Y 364ef0ef77
Some checks failed
E2E / Playwright e2e (push) Has been cancelled
feat: enhance email and contact detail views with snippet repair functionality
- Updated email and contact detail views to utilize the repairSnippet function for improved snippet display.
- Refactored email-view-messages to ensure consistent snippet formatting across different components.
- Enhanced mail-mime-body utility to include additional repair logic for handling UTF-8 mojibake, improving text rendering quality.
2026-06-18 11:10:26 +02:00

478 lines
14 KiB
TypeScript

/**
* Client-side repair for messages stored with raw MIME in body_text/body_html
* (before backend decode fix). Mirrors imap.RepairStoredBodies heuristics.
*/
import {
stripHiddenEmailHtml,
stripInvisibleTextRuns,
} from "@/lib/strip-hidden-email-html"
function looksLikeRawMime(s: string): boolean {
if (!s.includes("Content-Type:")) return false
return (
s.includes("Content-Transfer-Encoding:") ||
(s.includes("--") && s.toLowerCase().includes("multipart"))
)
}
function charsetFromContentType(contentType: string): string {
const match = contentType.match(/charset\s*=\s*"?([^";\s]+)"?/i)
return match?.[1]?.trim().toLowerCase() ?? ""
}
function isUtf8Charset(charset: string): boolean {
return (
charset === "" ||
charset === "utf-8" ||
charset === "utf8" ||
charset === "unicode-1-1-utf-8"
)
}
function decodeBytesToUtf8(bytes: Uint8Array, charset = ""): string {
if (bytes.length === 0) return ""
const normalized = charset.toLowerCase()
if (normalized && !isUtf8Charset(normalized)) {
try {
return new TextDecoder(normalized).decode(bytes)
} catch {
/* fall through */
}
}
try {
return new TextDecoder("utf-8", { fatal: true }).decode(bytes)
} catch {
try {
return new TextDecoder("windows-1252").decode(bytes)
} catch {
try {
return new TextDecoder("iso-8859-1").decode(bytes)
} catch {
return ""
}
}
}
}
const UTF8_MOJIBAKE_PAIR_RE = /[\u00C2\u00C3][\u0080-\u00BF]/
function looksLikeUtf8Mojibake(s: string): boolean {
return UTF8_MOJIBAKE_PAIR_RE.test(s)
}
/** UTF-8 misread as Latin-1: "réactivité" → "réactivité". */
function repairUtf8Mojibake(s: string): string {
if (!s || !looksLikeUtf8Mojibake(s)) return s
let out = ""
for (let i = 0; i < s.length; i++) {
const code = s.charCodeAt(i)
if ((code === 0xc2 || code === 0xc3) && i + 1 < s.length) {
const next = s.charCodeAt(i + 1)
if (next >= 0x80 && next <= 0xbf) {
const bytes = new Uint8Array([code, next])
try {
out += new TextDecoder("utf-8", { fatal: true }).decode(bytes)
i++
continue
} catch {
/* keep raw chars */
}
}
}
out += s[i]!
}
if (out === s) return s
return repairLoneMojibakeLeaders(out)
}
function repairLoneMojibakeLeaders(s: string): string {
return s
.replace(/\u00C3(?=[\s,.;:!?])/g, "à")
.replace(/\u00C2(?=[\s,.;:!?])/g, "Â")
}
function repairLegacyCharsetString(s: string): string {
if (!s) return s
let repaired = s
try {
new TextDecoder("utf-8", { fatal: true }).decode(
Uint8Array.from(s, (c) => c.charCodeAt(0) & 0xff)
)
} catch {
const bytes = Uint8Array.from(s, (c) => c.charCodeAt(0) & 0xff)
repaired = decodeBytesToUtf8(bytes)
}
return repairUtf8Mojibake(repaired)
}
function decodeBase64Part(encoded: string, charset = ""): string {
const clean = encoded.replace(/[\r\n\t ]/g, "")
try {
if (typeof atob !== "undefined") {
const bytes = Uint8Array.from(atob(clean), (c) => c.charCodeAt(0))
return decodeBytesToUtf8(bytes, charset)
}
} catch {
return ""
}
return ""
}
function parseEmbeddedMime(raw: string): { text: string; html: string } | null {
if (!looksLikeRawMime(raw)) return null
const boundaryMatch = raw.match(/boundary\s*=\s*"?([^";\s]+)"?/i)
const boundary =
boundaryMatch?.[1] ??
(() => {
for (const line of raw.split(/\r?\n/)) {
const t = line.trim()
if (t.startsWith("--") && !t.endsWith("--") && t.length > 2) {
return t.slice(2).trim()
}
}
return ""
})()
if (!boundary) return null
const parts = raw.split(new RegExp(`--${escapeRegExp(boundary)}(?:--)?\\s*\\r?\\n`))
let text = ""
let html = ""
for (const part of parts) {
const trimmed = part.trim()
if (!trimmed || !trimmed.includes("Content-Type:")) continue
const headerEnd = trimmed.search(/\r?\n\r?\n/)
if (headerEnd < 0) continue
const headers = trimmed.slice(0, headerEnd)
const body = trimmed.slice(headerEnd).replace(/^[\r\n]+/, "")
const typeHeader = headers.match(/Content-Type:\s*([^\r\n]+)/i)?.[1] ?? ""
const mediaType = typeHeader.split(";")[0]?.trim().toLowerCase() ?? ""
const charset = charsetFromContentType(typeHeader)
const encMatch = headers.match(/Content-Transfer-Encoding:\s*([^\r\n]+)/i)
const encoding = encMatch?.[1]?.trim().toLowerCase() ?? ""
let decoded = body.trim()
if (encoding === "base64") {
decoded = decodeBase64Part(decoded, charset)
} else if (encoding === "quoted-printable" || looksLikeQuotedPrintable(decoded)) {
decoded = decodeQuotedPrintableIfNeeded(decoded)
} else {
const bytes = Uint8Array.from(decoded, (c) => c.charCodeAt(0) & 0xff)
decoded = decodeBytesToUtf8(bytes, charset)
}
if (mediaType === "text/plain" && !text) text = decoded
if (mediaType === "text/html" && !html) html = decoded
}
if (!text && !html) return null
if (looksLikeRawMime(text) || looksLikeRawMime(html)) return null
return { text, html }
}
function escapeRegExp(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
}
function looksLikeBareBase64(s: string): boolean {
const clean = s.replace(/[\r\n\t ]/g, "")
if (clean.length < 24 || clean.length % 4 !== 0) return false
return /^[A-Za-z0-9+/]+=*$/.test(clean)
}
function looksLikeQuotedPrintable(s: string): boolean {
if (s.includes("=\r\n") || s.includes("=\n")) return true
if (s.includes("=3D") || s.includes("=C3=") || s.includes("=E2=")) return true
return (s.match(/=[0-9A-Fa-f]{2}/g)?.length ?? 0) >= 3
}
function decodeQuotedPrintableIfNeeded(s: string): string {
if (!looksLikeQuotedPrintable(s)) return s
try {
const bytes: number[] = []
const normalized = s.replace(/\r\n/g, "\n")
for (let i = 0; i < normalized.length; ) {
const ch = normalized[i]
if (ch === "=") {
if (normalized[i + 1] === "\n") {
i += 2
continue
}
const hex = normalized.slice(i + 1, i + 3)
if (/^[0-9A-Fa-f]{2}$/.test(hex)) {
bytes.push(parseInt(hex, 16))
i += 3
continue
}
}
bytes.push(ch.charCodeAt(0))
i += 1
}
return decodeBytesToUtf8(new Uint8Array(bytes))
} catch {
return s
}
}
function decodeBareBase64IfNeeded(s: string): string {
if (!looksLikeBareBase64(s)) return s
const decoded = decodeBase64Part(s)
if (!decoded || decoded === s) return s
return decoded
}
export function repairMimeBodies(
bodyText?: string,
bodyHtml?: string
): { bodyText?: string; bodyHtml?: string } {
let text = repairLegacyCharsetString(bodyText?.trim() ?? "")
let html = repairLegacyCharsetString(bodyHtml?.trim() ?? "")
text = decodeQuotedPrintableIfNeeded(text)
html = decodeQuotedPrintableIfNeeded(html)
text = decodeBareBase64IfNeeded(text)
html = decodeBareBase64IfNeeded(html)
html = stripHiddenEmailHtml(html)
text = stripInvisibleTextRuns(text)
if (!looksLikeRawMime(text) && !looksLikeRawMime(html)) {
return finalizeMimeBodies(text, bodyText, html, bodyHtml)
}
const parsed = parseEmbeddedMime(text || html)
if (!parsed) return finalizeMimeBodies(text, bodyText, html, bodyHtml)
return finalizeMimeBodies(
parsed.text || text || bodyText,
bodyText,
parsed.html || html || bodyHtml,
bodyHtml
)
}
function finalizeMimeBodies(
text: string | undefined,
bodyText: string | undefined,
html: string | undefined,
bodyHtml: string | undefined
): { bodyText?: string; bodyHtml?: string } {
const outText = repairUtf8Mojibake(repairLegacyCharsetString(text?.trim() ?? ""))
const outHtml = repairUtf8Mojibake(repairLegacyCharsetString(html?.trim() ?? ""))
return {
bodyText: outText || bodyText,
bodyHtml: outHtml || bodyHtml,
}
}
const SEPARATOR_RUN_RE = /-{8,}|_{8,}|={8,}|\*{8,}/g
function normalizeSnippetMatchText(s: string): string {
return s
.toLowerCase()
.replace(/[[\]()·]/g, " ")
.replace(/\s+/g, " ")
.trim()
}
function isViewInBrowserSnippet(s: string): boolean {
const norm = normalizeSnippetMatchText(s)
const phrases = [
"afficher dans le navigateur",
"view in browser",
"voir ce message en ligne",
"version en ligne",
]
for (const phrase of phrases) {
if (!norm.includes(phrase)) continue
const rest = norm.replace(phrase, "").trim()
if (rest.length <= 20 || s.length <= 90) return true
}
return false
}
function isSnippetLegalFooter(s: string): boolean {
const lower = s.toLowerCase()
if (!lower.includes("http") && !lower.includes("www.")) return false
const markers = [
"sas ",
"sarl ",
"rue ",
" bp ",
"kellermann",
"ovh.com",
"www.ovh",
]
let hits = 0
for (const m of markers) {
if (lower.includes(m)) hits++
}
if (hits >= 2) return true
return lower.startsWith("sas ") && lower.includes("http")
}
function hasLeadingSeparatorRun(s: string): boolean {
const trimmed = s.trimStart()
let run = 0
for (const ch of trimmed) {
if ("-_*='=·—".includes(ch)) run++
else break
}
return run >= 12
}
function looksLikeCssSnippet(s: string): boolean {
const lower = s.toLowerCase()
return (
lower.includes(":root") ||
lower.includes("color-scheme:") ||
lower.includes("@media") ||
lower.includes("@font-face") ||
lower.includes("font-family:") ||
(lower.includes("facebook") && lower.includes(":root")) ||
(lower.includes("meta for business") && s.includes("{")) ||
lower.includes("/*//") ||
lower.includes("||//") ||
(s.includes("{") &&
s.includes("}") &&
s.includes(";") &&
(lower.includes("font-") ||
lower.includes("margin:") ||
lower.includes("padding:"))) ||
/^\s*\/\*/.test(s)
)
}
function hasUndecodedHtmlEntities(s: string): boolean {
return HTML_ENTITY_RE.test(s)
}
function isMostlySeparatorLine(s: string): boolean {
if (s.length < 8) return false
const sep = (s.match(/[-_*=·—|]/g) ?? []).length
return sep / [...s].length >= 0.55
}
function isSnippetBoilerplate(s: string): boolean {
const t = stripHtmlTagsForSnippet(s.trim())
if (!t || t.length < 4) return true
if (looksLikeCssSnippet(t) || isMostlySeparatorLine(t) || hasLeadingSeparatorRun(t))
return true
if (isViewInBrowserSnippet(t) || isSnippetLegalFooter(t)) return true
if (/<[^>]+>/.test(s)) return true
if (hasUndecodedHtmlEntities(s)) return true
const lower = t.toLowerCase()
const phrases = ["si vous ne visualisez pas", "cliquer ici", "click here"]
if (phrases.some((p) => lower.includes(p)) && t.length < 200) return true
if (
(lower.startsWith("http://") || lower.startsWith("https://")) &&
t.length < 120
)
return true
const letters = (t.match(/\p{L}|\p{N}/gu) ?? []).length
return letters / [...t].length < 0.35
}
function pickBestSnippetLine(lines: string[]): string {
let best = ""
let bestScore = -1
for (const line of lines) {
const t = stripHtmlTagsForSnippet(line.trim())
if (!t || isSnippetBoilerplate(t)) continue
const letters = (t.match(/\p{L}/gu) ?? []).length
if (letters < 8) continue
let score = letters * 4
if (t.length > 40 && t.length < 280) score += 40
if (score > bestScore) {
bestScore = score
best = t
}
}
return best
}
const HTML_ENTITY_RE =
/&(?:#x?[0-9a-f]+|[a-z][a-z0-9]{1,8});/gi
function decodeHtmlEntitiesForSnippet(s: string): string {
let out = s
for (let i = 0; i < 4; i++) {
const next = out.replace(HTML_ENTITY_RE, (entity) => {
if (typeof document !== "undefined") {
const el = document.createElement("textarea")
el.innerHTML = entity
return el.value
}
const lower = entity.toLowerCase()
if (lower === "&nbsp;") return " "
if (lower === "&amp;") return "&"
if (lower === "&lt;") return "<"
if (lower === "&gt;") return ">"
if (lower === "&quot;") return '"'
if (lower === "&apos;") return "'"
const dec = entity.match(/^&#(\d+);$/i)
if (dec) return String.fromCodePoint(Number.parseInt(dec[1], 10))
const hex = entity.match(/^&#x([0-9a-f]+);$/i)
if (hex) return String.fromCodePoint(Number.parseInt(hex[1], 16))
return entity
})
if (next === out) break
out = next
}
return out
}
function stripSnippetCssTail(s: string): string {
for (const marker of ["/*//", "/*", "//||"]) {
const idx = s.indexOf(marker)
if (idx >= 0) {
const head = s.slice(0, idx).trim()
if (head.length >= 12) return head.replace(/[ /*\-_|]+$/g, "")
}
}
return s
}
function splitSnippetSegments(s: string): string[] {
return s
.replace(/\r\n/g, "\n")
.split("\n")
.flatMap((line) =>
line
.split(SEPARATOR_RUN_RE)
.map((p) => stripHtmlTagsForSnippet(p.trim()))
.filter(Boolean)
)
}
function stripHtmlTagsForSnippet(s: string): string {
let stripped = s
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, " ")
.replace(/<[^>]+>/g, " ")
stripped = decodeHtmlEntitiesForSnippet(stripped)
stripped = stripSnippetCssTail(stripped)
return stripped.replace(/\s+/g, " ").trim()
}
function polishSnippetPreview(snippet: string): string {
const cleaned = stripHtmlTagsForSnippet(snippet)
const best = pickBestSnippetLine(splitSnippetSegments(cleaned))
if (best) return best.length > 200 ? best.slice(0, 200) : best
if (isSnippetBoilerplate(cleaned)) return ""
return cleaned.length > 200 ? cleaned.slice(0, 200) : cleaned
}
/** List/search preview stored as undecoded base64 or marketing boilerplate. */
export function repairSnippet(snippet?: string): string | undefined {
if (!snippet?.trim()) return snippet
const trimmed = snippet.trim()
const qp = decodeQuotedPrintableIfNeeded(trimmed)
const decoded = decodeBareBase64IfNeeded(qp)
const raw = decoded !== trimmed ? decoded : snippet
const cleaned = stripInvisibleTextRuns(raw)
const polished = repairUtf8Mojibake(polishSnippetPreview(cleaned))
return polished || undefined
}