ultisuite-client/lib/mail-mime-body.ts

/**
 * Client-side repair for messages stored with raw MIME in body_text/body_html
 * (before backend decode fix). Mirrors imap.RepairStoredBodies heuristics.
 */

import {
  stripHiddenEmailHtml,
  stripInvisibleTextRuns,
} from "@/lib/strip-hidden-email-html"

function looksLikeRawMime(s: string): boolean {
  if (!s.includes("Content-Type:")) return false
  return (
    s.includes("Content-Transfer-Encoding:") ||
    (s.includes("--") && s.toLowerCase().includes("multipart"))
  )
}

function charsetFromContentType(contentType: string): string {
  const match = contentType.match(/charset\s*=\s*"?([^";\s]+)"?/i)
  return match?.[1]?.trim().toLowerCase() ?? ""
}

function isUtf8Charset(charset: string): boolean {
  return (
    charset === "" ||
    charset === "utf-8" ||
    charset === "utf8" ||
    charset === "unicode-1-1-utf-8"
  )
}

function decodeBytesToUtf8(bytes: Uint8Array, charset = ""): string {
  if (bytes.length === 0) return ""
  const normalized = charset.toLowerCase()
  if (normalized && !isUtf8Charset(normalized)) {
    try {
      return new TextDecoder(normalized).decode(bytes)
    } catch {
      /* fall through */
    }
  }
  try {
    return new TextDecoder("utf-8", { fatal: true }).decode(bytes)
  } catch {
    try {
      return new TextDecoder("windows-1252").decode(bytes)
    } catch {
      try {
        return new TextDecoder("iso-8859-1").decode(bytes)
      } catch {
        return ""
      }
    }
  }
}

const UTF8_MOJIBAKE_PAIR_RE = /[\u00C2\u00C3][\u0080-\u00BF]/

function looksLikeUtf8Mojibake(s: string): boolean {
  return UTF8_MOJIBAKE_PAIR_RE.test(s)
}

/** UTF-8 misread as Latin-1: "rÃ©activitÃ©" → "réactivité". */
function repairUtf8Mojibake(s: string): string {
  if (!s || !looksLikeUtf8Mojibake(s)) return s
  let out = ""
  for (let i = 0; i < s.length; i++) {
    const code = s.charCodeAt(i)
    if ((code === 0xc2 || code === 0xc3) && i + 1 < s.length) {
      const next = s.charCodeAt(i + 1)
      if (next >= 0x80 && next <= 0xbf) {
        const bytes = new Uint8Array([code, next])
        try {
          out += new TextDecoder("utf-8", { fatal: true }).decode(bytes)
          i++
          continue
        } catch {
          /* keep raw chars */
        }
      }
    }
    out += s[i]!
  }
  if (out === s) return s
  return repairLoneMojibakeLeaders(out)
}

function repairLoneMojibakeLeaders(s: string): string {
  return s
    .replace(/\u00C3(?=[\s,.;:!?])/g, "à")
    .replace(/\u00C2(?=[\s,.;:!?])/g, "Â")
}

function repairLegacyCharsetString(s: string): string {
  if (!s) return s
  let repaired = s
  try {
    new TextDecoder("utf-8", { fatal: true }).decode(
      Uint8Array.from(s, (c) => c.charCodeAt(0) & 0xff)
    )
  } catch {
    const bytes = Uint8Array.from(s, (c) => c.charCodeAt(0) & 0xff)
    repaired = decodeBytesToUtf8(bytes)
  }
  return repairUtf8Mojibake(repaired)
}

function decodeBase64Part(encoded: string, charset = ""): string {
  const clean = encoded.replace(/[\r\n\t ]/g, "")
  try {
    if (typeof atob !== "undefined") {
      const bytes = Uint8Array.from(atob(clean), (c) => c.charCodeAt(0))
      return decodeBytesToUtf8(bytes, charset)
    }
  } catch {
    return ""
  }
  return ""
}

function parseEmbeddedMime(raw: string): { text: string; html: string } | null {
  if (!looksLikeRawMime(raw)) return null

  const boundaryMatch = raw.match(/boundary\s*=\s*"?([^";\s]+)"?/i)
  const boundary =
    boundaryMatch?.[1] ??
    (() => {
      for (const line of raw.split(/\r?\n/)) {
        const t = line.trim()
        if (t.startsWith("--") && !t.endsWith("--") && t.length > 2) {
          return t.slice(2).trim()
        }
      }
      return ""
    })()

  if (!boundary) return null

  const parts = raw.split(new RegExp(`--${escapeRegExp(boundary)}(?:--)?\\s*\\r?\\n`))
  let text = ""
  let html = ""

  for (const part of parts) {
    const trimmed = part.trim()
    if (!trimmed || !trimmed.includes("Content-Type:")) continue

    const headerEnd = trimmed.search(/\r?\n\r?\n/)
    if (headerEnd < 0) continue
    const headers = trimmed.slice(0, headerEnd)
    const body = trimmed.slice(headerEnd).replace(/^[\r\n]+/, "")

    const typeHeader = headers.match(/Content-Type:\s*([^\r\n]+)/i)?.[1] ?? ""
    const mediaType = typeHeader.split(";")[0]?.trim().toLowerCase() ?? ""
    const charset = charsetFromContentType(typeHeader)
    const encMatch = headers.match(/Content-Transfer-Encoding:\s*([^\r\n]+)/i)
    const encoding = encMatch?.[1]?.trim().toLowerCase() ?? ""

    let decoded = body.trim()
    if (encoding === "base64") {
      decoded = decodeBase64Part(decoded, charset)
    } else if (encoding === "quoted-printable" || looksLikeQuotedPrintable(decoded)) {
      decoded = decodeQuotedPrintableIfNeeded(decoded)
    } else {
      const bytes = Uint8Array.from(decoded, (c) => c.charCodeAt(0) & 0xff)
      decoded = decodeBytesToUtf8(bytes, charset)
    }

    if (mediaType === "text/plain" && !text) text = decoded
    if (mediaType === "text/html" && !html) html = decoded
  }

  if (!text && !html) return null
  if (looksLikeRawMime(text) || looksLikeRawMime(html)) return null
  return { text, html }
}

function escapeRegExp(s: string): string {
  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
}

function looksLikeBareBase64(s: string): boolean {
  const clean = s.replace(/[\r\n\t ]/g, "")
  if (clean.length < 24 || clean.length % 4 !== 0) return false
  return /^[A-Za-z0-9+/]+=*$/.test(clean)
}

function looksLikeQuotedPrintable(s: string): boolean {
  if (s.includes("=\r\n") || s.includes("=\n")) return true
  if (s.includes("=3D") || s.includes("=C3=") || s.includes("=E2=")) return true
  return (s.match(/=[0-9A-Fa-f]{2}/g)?.length ?? 0) >= 3
}

function decodeQuotedPrintableIfNeeded(s: string): string {
  if (!looksLikeQuotedPrintable(s)) return s
  try {
    const bytes: number[] = []
    const normalized = s.replace(/\r\n/g, "\n")
    for (let i = 0; i < normalized.length; ) {
      const ch = normalized[i]
      if (ch === "=") {
        if (normalized[i + 1] === "\n") {
          i += 2
          continue
        }
        const hex = normalized.slice(i + 1, i + 3)
        if (/^[0-9A-Fa-f]{2}$/.test(hex)) {
          bytes.push(parseInt(hex, 16))
          i += 3
          continue
        }
      }
      bytes.push(ch.charCodeAt(0))
      i += 1
    }
    return decodeBytesToUtf8(new Uint8Array(bytes))
  } catch {
    return s
  }
}

function decodeBareBase64IfNeeded(s: string): string {
  if (!looksLikeBareBase64(s)) return s
  const decoded = decodeBase64Part(s)
  if (!decoded || decoded === s) return s
  return decoded
}

export function repairMimeBodies(
  bodyText?: string,
  bodyHtml?: string
): { bodyText?: string; bodyHtml?: string } {
  let text = repairLegacyCharsetString(bodyText?.trim() ?? "")
  let html = repairLegacyCharsetString(bodyHtml?.trim() ?? "")

  text = decodeQuotedPrintableIfNeeded(text)
  html = decodeQuotedPrintableIfNeeded(html)
  text = decodeBareBase64IfNeeded(text)
  html = decodeBareBase64IfNeeded(html)
  html = stripHiddenEmailHtml(html)
  text = stripInvisibleTextRuns(text)

  if (!looksLikeRawMime(text) && !looksLikeRawMime(html)) {
    return finalizeMimeBodies(text, bodyText, html, bodyHtml)
  }
  const parsed = parseEmbeddedMime(text || html)
  if (!parsed) return finalizeMimeBodies(text, bodyText, html, bodyHtml)
  return finalizeMimeBodies(
    parsed.text || text || bodyText,
    bodyText,
    parsed.html || html || bodyHtml,
    bodyHtml
  )
}

function finalizeMimeBodies(
  text: string | undefined,
  bodyText: string | undefined,
  html: string | undefined,
  bodyHtml: string | undefined
): { bodyText?: string; bodyHtml?: string } {
  const outText = repairUtf8Mojibake(repairLegacyCharsetString(text?.trim() ?? ""))
  const outHtml = repairUtf8Mojibake(repairLegacyCharsetString(html?.trim() ?? ""))
  return {
    bodyText: outText || bodyText,
    bodyHtml: outHtml || bodyHtml,
  }
}

const SEPARATOR_RUN_RE = /-{8,}|_{8,}|={8,}|\*{8,}/g

function normalizeSnippetMatchText(s: string): string {
  return s
    .toLowerCase()
    .replace(/[[\]()·]/g, " ")
    .replace(/\s+/g, " ")
    .trim()
}

function isViewInBrowserSnippet(s: string): boolean {
  const norm = normalizeSnippetMatchText(s)
  const phrases = [
    "afficher dans le navigateur",
    "view in browser",
    "voir ce message en ligne",
    "version en ligne",
  ]
  for (const phrase of phrases) {
    if (!norm.includes(phrase)) continue
    const rest = norm.replace(phrase, "").trim()
    if (rest.length <= 20 || s.length <= 90) return true
  }
  return false
}

function isSnippetLegalFooter(s: string): boolean {
  const lower = s.toLowerCase()
  if (!lower.includes("http") && !lower.includes("www.")) return false
  const markers = [
    "sas ",
    "sarl ",
    "rue ",
    " bp ",
    "kellermann",
    "ovh.com",
    "www.ovh",
  ]
  let hits = 0
  for (const m of markers) {
    if (lower.includes(m)) hits++
  }
  if (hits >= 2) return true
  return lower.startsWith("sas ") && lower.includes("http")
}

function hasLeadingSeparatorRun(s: string): boolean {
  const trimmed = s.trimStart()
  let run = 0
  for (const ch of trimmed) {
    if ("-_*='=·—".includes(ch)) run++
    else break
  }
  return run >= 12
}

function looksLikeCssSnippet(s: string): boolean {
  const lower = s.toLowerCase()
  return (
    lower.includes(":root") ||
    lower.includes("color-scheme:") ||
    lower.includes("@media") ||
    lower.includes("@font-face") ||
    lower.includes("font-family:") ||
    (lower.includes("facebook") && lower.includes(":root")) ||
    (lower.includes("meta for business") && s.includes("{")) ||
    lower.includes("/*//") ||
    lower.includes("||//") ||
    (s.includes("{") &&
      s.includes("}") &&
      s.includes(";") &&
      (lower.includes("font-") ||
        lower.includes("margin:") ||
        lower.includes("padding:"))) ||
    /^\s*\/\*/.test(s)
  )
}

function hasUndecodedHtmlEntities(s: string): boolean {
  return HTML_ENTITY_RE.test(s)
}

function isMostlySeparatorLine(s: string): boolean {
  if (s.length < 8) return false
  const sep = (s.match(/[-_*=·—|]/g) ?? []).length
  return sep / [...s].length >= 0.55
}

function isSnippetBoilerplate(s: string): boolean {
  const t = stripHtmlTagsForSnippet(s.trim())
  if (!t || t.length < 4) return true
  if (looksLikeCssSnippet(t) || isMostlySeparatorLine(t) || hasLeadingSeparatorRun(t))
    return true
  if (isViewInBrowserSnippet(t) || isSnippetLegalFooter(t)) return true
  if (/<[^>]+>/.test(s)) return true
  if (hasUndecodedHtmlEntities(s)) return true
  const lower = t.toLowerCase()
  const phrases = ["si vous ne visualisez pas", "cliquer ici", "click here"]
  if (phrases.some((p) => lower.includes(p)) && t.length < 200) return true
  if (
    (lower.startsWith("http://") || lower.startsWith("https://")) &&
    t.length < 120
  )
    return true
  const letters = (t.match(/\p{L}|\p{N}/gu) ?? []).length
  return letters / [...t].length < 0.35
}

function pickBestSnippetLine(lines: string[]): string {
  let best = ""
  let bestScore = -1
  for (const line of lines) {
    const t = stripHtmlTagsForSnippet(line.trim())
    if (!t || isSnippetBoilerplate(t)) continue
    const letters = (t.match(/\p{L}/gu) ?? []).length
    if (letters < 8) continue
    let score = letters * 4
    if (t.length > 40 && t.length < 280) score += 40
    if (score > bestScore) {
      bestScore = score
      best = t
    }
  }
  return best
}

const HTML_ENTITY_RE =
  /&(?:#x?[0-9a-f]+|[a-z][a-z0-9]{1,8});/gi

function decodeHtmlEntitiesForSnippet(s: string): string {
  let out = s
  for (let i = 0; i < 4; i++) {
    const next = out.replace(HTML_ENTITY_RE, (entity) => {
      if (typeof document !== "undefined") {
        const el = document.createElement("textarea")
        el.innerHTML = entity
        return el.value
      }
      const lower = entity.toLowerCase()
      if (lower === "&nbsp;") return " "
      if (lower === "&amp;") return "&"
      if (lower === "&lt;") return "<"
      if (lower === "&gt;") return ">"
      if (lower === "&quot;") return '"'
      if (lower === "&apos;") return "'"
      const dec = entity.match(/^&#(\d+);$/i)
      if (dec) return String.fromCodePoint(Number.parseInt(dec[1], 10))
      const hex = entity.match(/^&#x([0-9a-f]+);$/i)
      if (hex) return String.fromCodePoint(Number.parseInt(hex[1], 16))
      return entity
    })
    if (next === out) break
    out = next
  }
  return out
}

function stripSnippetCssTail(s: string): string {
  for (const marker of ["/*//", "/*", "//||"]) {
    const idx = s.indexOf(marker)
    if (idx >= 0) {
      const head = s.slice(0, idx).trim()
      if (head.length >= 12) return head.replace(/[ /*\-_|]+$/g, "")
    }
  }
  return s
}

function splitSnippetSegments(s: string): string[] {
  return s
    .replace(/\r\n/g, "\n")
    .split("\n")
    .flatMap((line) =>
      line
        .split(SEPARATOR_RUN_RE)
        .map((p) => stripHtmlTagsForSnippet(p.trim()))
        .filter(Boolean)
    )
}

function stripHtmlTagsForSnippet(s: string): string {
  let stripped = s
    .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, " ")
    .replace(/<[^>]+>/g, " ")
  stripped = decodeHtmlEntitiesForSnippet(stripped)
  stripped = stripSnippetCssTail(stripped)
  return stripped.replace(/\s+/g, " ").trim()
}

function polishSnippetPreview(snippet: string): string {
  const cleaned = stripHtmlTagsForSnippet(snippet)
  const best = pickBestSnippetLine(splitSnippetSegments(cleaned))
  if (best) return best.length > 200 ? best.slice(0, 200) : best
  if (isSnippetBoilerplate(cleaned)) return ""
  return cleaned.length > 200 ? cleaned.slice(0, 200) : cleaned
}

/** List/search preview stored as undecoded base64 or marketing boilerplate. */
export function repairSnippet(snippet?: string): string | undefined {
  if (!snippet?.trim()) return snippet
  const trimmed = snippet.trim()
  const qp = decodeQuotedPrintableIfNeeded(trimmed)
  const decoded = decodeBareBase64IfNeeded(qp)
  const raw = decoded !== trimmed ? decoded : snippet
  const cleaned = stripInvisibleTextRuns(raw)
  const polished = repairUtf8Mojibake(polishSnippetPreview(cleaned))
  return polished || undefined
}