package imap import ( "io" "mime/quotedprintable" "strings" "unicode" "github.com/ultisuite/ulti-backend/internal/mail/sanitize" ) const minBareBase64Len = 24 // RepairStoredBodies fixes bodies stored as raw MIME, quoted-printable, or base64. func RepairStoredBodies(text, html string) (string, string) { text = repairLegacyCharsetString(text) html = repairLegacyCharsetString(html) text, html = repairRawMIME(text, html) text = decodeBareQuotedPrintableIfNeeded(text) html = decodeBareQuotedPrintableIfNeeded(html) text = decodeBareBase64IfNeeded(text) html = decodeBareBase64IfNeeded(html) text = stripPlainTextPreheaderPadding(text) return text, html } func repairRawMIME(text, html string) (string, string) { if !looksLikeRawMIME(text) && !looksLikeRawMIME(html) { return text, html } raw := text if raw == "" { raw = html } t, h := parseBody([]byte(raw)) if t == "" && h == "" { return text, html } if looksLikeRawMIME(t) || looksLikeRawMIME(h) { return text, html } return t, h } // RepairSnippet fixes list/search previews stored as undecoded base64 or raw MIME. func RepairSnippet(snippet string) string { return RepairSnippetWithBodies(snippet, "", "") } // RepairSnippetWithBodies decodes a stored snippet and optionally rebuilds from bodies. func RepairSnippetWithBodies(snippet, bodyText, bodyHTML string) string { snippet = stripSnippetMarkup(snippet) if decoded := decodeBareQuotedPrintableIfNeeded(snippet); decoded != snippet { snippet = stripSnippetMarkup(decoded) } if decoded := decodeBareBase64IfNeeded(snippet); decoded != snippet { snippet = stripSnippetMarkup(decoded) } snippet = stripSnippetMarkup(stripPlainTextPreheaderPadding(snippet)) if looksLikeRawMIME(snippet) { t, h, ok := parseEmbeddedMIME([]byte(snippet)) if ok { return SnippetFromBodies(t, h, 200) } } bodyText, bodyHTML = RepairStoredBodies(bodyText, bodyHTML) if bodyText != "" || bodyHTML != "" { rebuilt := SnippetFromBodies(bodyText, bodyHTML, 200) storedBad := snippet == "" || SnippetLooksLowQuality(snippet) || isSnippetBoilerplate(snippet) if rebuilt != "" && (storedBad || snippetLineScore(rebuilt) > snippetLineScore(snippet)) { return rebuilt } if storedBad { return rebuilt } } if snippet == "" { return "" } if SnippetLooksLowQuality(snippet) { return "" } return stripSnippetMarkup(snippet) } func stripPlainTextPreheaderPadding(text string) string { return sanitize.StripInvisibleTextRuns(text) } func stripHTMLForSnippet(html string) string { if html == "" { return "" } html = sanitize.StripHiddenEmailHTML(html) var b strings.Builder inTag := false for _, r := range html { switch { case r == '<': inTag = true case r == '>': inTag = false case !inTag && r != '\r': if r == '\n' { if b.Len() > 0 && b.String()[b.Len()-1] != ' ' { b.WriteRune(' ') } } else if !unicode.IsControl(r) { b.WriteRune(r) } } } return sanitize.StripInvisibleTextRuns(strings.Join(strings.Fields(b.String()), " ")) } func decodeBareQuotedPrintableIfNeeded(s string) string { if s == "" || !looksLikeQuotedPrintable(s) { return s } decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(s))) if err != nil || len(decoded) == 0 || !isMostlyReadableText(decoded) { return s } return decodeBodyBytesToUTF8(decoded, "") } func looksLikeQuotedPrintable(s string) bool { if strings.Contains(s, "=\r\n") || strings.Contains(s, "=\n") { return true } if strings.Contains(s, "=3D") || strings.Contains(s, "=C3=") || strings.Contains(s, "=E2=") { return true } return len(qpHexSeqRE.FindAllString(s, -1)) >= 3 } func decodeBareBase64IfNeeded(s string) string { if s == "" { return s } trimmed := strings.TrimSpace(s) if len(trimmed) < minBareBase64Len { return s } clean := stripBase64Whitespace(trimmed) if !isLikelyBase64(clean) { return s } decoded, err := decodeBase64Body([]byte(clean)) if err != nil || len(decoded) == 0 || !isMostlyReadableText(decoded) { return s } return decodeBodyBytesToUTF8(decoded, "") } func stripBase64Whitespace(s string) string { var b strings.Builder b.Grow(len(s)) for _, r := range s { switch r { case '\r', '\n', ' ', '\t': continue default: b.WriteRune(r) } } return b.String() } func isLikelyBase64(s string) bool { if len(s) < minBareBase64Len || len(s)%4 != 0 { return false } for _, r := range s { switch { case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z', r >= '0' && r <= '9', r == '+', r == '/', r == '=': continue default: return false } } return strings.Contains(s, "=") || len(s) >= 32 } func isMostlyReadableText(b []byte) bool { if len(b) == 0 { return false } printable := 0 for _, c := range b { if c == '\n' || c == '\r' || c == '\t' || (c >= 32 && c < 127) || c >= 0xc0 { printable++ } } return float64(printable)/float64(len(b)) >= 0.85 }