87 lines
2.1 KiB
Go
87 lines
2.1 KiB
Go
package imap
|
|
|
|
import (
|
|
"mime"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/encoding"
|
|
"golang.org/x/text/encoding/charmap"
|
|
"golang.org/x/text/encoding/htmlindex"
|
|
"golang.org/x/text/transform"
|
|
)
|
|
|
|
func charsetFromContentType(contentType string) string {
|
|
if contentType == "" {
|
|
return ""
|
|
}
|
|
_, params, err := mime.ParseMediaType(contentType)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.Trim(strings.TrimSpace(params["charset"]), `"`)
|
|
}
|
|
|
|
func isUTF8Charset(charset string) bool {
|
|
switch strings.ToLower(strings.TrimSpace(charset)) {
|
|
case "", "utf-8", "utf8", "unicode-1-1-utf-8":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// decodeBodyBytesToUTF8 converts a MIME part payload to UTF-8 using Content-Type charset.
|
|
func decodeBodyBytesToUTF8(data []byte, contentType string) string {
|
|
if len(data) == 0 {
|
|
return ""
|
|
}
|
|
charset := charsetFromContentType(contentType)
|
|
if charset != "" && !isUTF8Charset(charset) {
|
|
if decoded := decodeBytesWithCharset(data, charset); decoded != "" {
|
|
return decoded
|
|
}
|
|
}
|
|
if utf8.Valid(data) {
|
|
return string(data)
|
|
}
|
|
return repairRawBytesToUTF8(data)
|
|
}
|
|
|
|
func decodeBytesWithCharset(data []byte, charset string) string {
|
|
enc, err := htmlindex.Get(charset)
|
|
if err != nil || enc == nil {
|
|
return ""
|
|
}
|
|
decoded, err := enc.NewDecoder().Bytes(data)
|
|
if err != nil || !utf8.Valid(decoded) {
|
|
return ""
|
|
}
|
|
return string(decoded)
|
|
}
|
|
|
|
// repairRawBytesToUTF8 fixes bodies stored without charset conversion (Latin-1 / Windows-1252).
|
|
func repairRawBytesToUTF8(data []byte) string {
|
|
if len(data) == 0 {
|
|
return ""
|
|
}
|
|
if utf8.Valid(data) {
|
|
return string(data)
|
|
}
|
|
for _, enc := range []encoding.Encoding{charmap.Windows1252, charmap.ISO8859_1} {
|
|
decoded, _, err := transform.Bytes(enc.NewDecoder(), data)
|
|
if err == nil && utf8.Valid(decoded) && isMostlyReadableText(decoded) {
|
|
return string(decoded)
|
|
}
|
|
}
|
|
return strings.ToValidUTF8(string(data), "")
|
|
}
|
|
|
|
// repairLegacyCharsetString fixes text already loaded as a Go string with invalid UTF-8 bytes.
|
|
func repairLegacyCharsetString(s string) string {
|
|
if s == "" || utf8.ValidString(s) {
|
|
return s
|
|
}
|
|
return repairRawBytesToUTF8([]byte(s))
|
|
}
|