package imap import ( "bytes" "mime" "net/mail" "regexp" "strings" "unicode" ) var ( htmlTitleRE = regexp.MustCompile(`(?is)]*>\s*([^<]+?)\s*`) qpHexSeqRE = regexp.MustCompile(`=[0-9A-Fa-f]{2}`) ) var mimeWordDecoder mime.WordDecoder // RepairSubject decodes RFC 2047 / broken envelope subjects using headers or body fallbacks. func RepairSubject(subject string, bodyText, bodyHTML string, raw []byte) string { if s := decodeMIMEHeaderValue(subject); !subjectLooksBroken(s) { return s } if len(raw) > 0 { if hdr := subjectFromRawMessage(raw); hdr != "" && !subjectLooksBroken(hdr) { return hdr } } decodedHTML := decodeBareQuotedPrintableIfNeeded(bodyHTML) decodedText := decodeBareQuotedPrintableIfNeeded(bodyText) if t := extractSubjectFromHTML(decodedHTML); t != "" { return t } if fallback := subjectFromBodyFallback(decodedText, decodedHTML); fallback != "" { return fallback } return decodeMIMEHeaderValue(subject) } func decodeMIMEHeaderValue(s string) string { s = strings.TrimSpace(s) if s == "" { return s } dec, err := mimeWordDecoder.DecodeHeader(s) if err != nil { return toValidUTF8(s) } return toValidUTF8(dec) } func subjectFromRawMessage(raw []byte) string { msg, err := mail.ReadMessage(bytes.NewReader(raw)) if err != nil { return "" } return decodeMIMEHeaderValue(msg.Header.Get("Subject")) } func extractSubjectFromHTML(html string) string { html = strings.TrimSpace(html) if html == "" { return "" } if m := htmlTitleRE.FindStringSubmatch(html); len(m) > 1 { t := strings.TrimSpace(m[1]) if t != "" && !subjectLooksBroken(t) { return t } } return "" } func subjectFromBodyFallback(text, html string) string { plain := strings.TrimSpace(text) if plain == "" { plain = strings.TrimSpace(stripHTMLForSnippet(html)) } if plain == "" || subjectLooksBroken(plain) { return "" } if idx := strings.IndexAny(plain, ".\n\r"); idx >= 15 && idx <= 100 { return truncate(strings.TrimSpace(plain[:idx]), 120) } return truncate(plain, 120) } func subjectLooksBroken(s string) bool { s = strings.TrimSpace(s) if s == "" { return true } letters := 0 for _, r := range s { if unicode.IsLetter(r) || unicode.IsNumber(r) { letters++ } } return letters < 2 }