ultisuite-backend/internal/mail/imap/charset.go
2026-06-04 00:12:11 +02:00

87 lines
2.1 KiB
Go

package imap
import (
"mime"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/htmlindex"
"golang.org/x/text/transform"
)
func charsetFromContentType(contentType string) string {
if contentType == "" {
return ""
}
_, params, err := mime.ParseMediaType(contentType)
if err != nil {
return ""
}
return strings.Trim(strings.TrimSpace(params["charset"]), `"`)
}
func isUTF8Charset(charset string) bool {
switch strings.ToLower(strings.TrimSpace(charset)) {
case "", "utf-8", "utf8", "unicode-1-1-utf-8":
return true
default:
return false
}
}
// decodeBodyBytesToUTF8 converts a MIME part payload to UTF-8 using Content-Type charset.
func decodeBodyBytesToUTF8(data []byte, contentType string) string {
if len(data) == 0 {
return ""
}
charset := charsetFromContentType(contentType)
if charset != "" && !isUTF8Charset(charset) {
if decoded := decodeBytesWithCharset(data, charset); decoded != "" {
return decoded
}
}
if utf8.Valid(data) {
return string(data)
}
return repairRawBytesToUTF8(data)
}
func decodeBytesWithCharset(data []byte, charset string) string {
enc, err := htmlindex.Get(charset)
if err != nil || enc == nil {
return ""
}
decoded, err := enc.NewDecoder().Bytes(data)
if err != nil || !utf8.Valid(decoded) {
return ""
}
return string(decoded)
}
// repairRawBytesToUTF8 fixes bodies stored without charset conversion (Latin-1 / Windows-1252).
func repairRawBytesToUTF8(data []byte) string {
if len(data) == 0 {
return ""
}
if utf8.Valid(data) {
return string(data)
}
for _, enc := range []encoding.Encoding{charmap.Windows1252, charmap.ISO8859_1} {
decoded, _, err := transform.Bytes(enc.NewDecoder(), data)
if err == nil && utf8.Valid(decoded) && isMostlyReadableText(decoded) {
return string(decoded)
}
}
return strings.ToValidUTF8(string(data), "")
}
// repairLegacyCharsetString fixes text already loaded as a Go string with invalid UTF-8 bytes.
func repairLegacyCharsetString(s string) string {
if s == "" || utf8.ValidString(s) {
return s
}
return repairRawBytesToUTF8([]byte(s))
}