package imap import ( stdhtml "html" "regexp" "strings" "unicode" "golang.org/x/net/html" "github.com/ultisuite/ulti-backend/internal/mail/sanitize" ) var snippetHTMLTagRE = regexp.MustCompile(`(?is)<[^>]*>`) var snippetSkipTags = map[string]bool{ "script": true, "style": true, "head": true, "noscript": true, "meta": true, "link": true, "title": true, "svg": true, } var snippetBlockTags = map[string]bool{ "p": true, "li": true, "td": true, "th": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, "div": true, "span": true, "a": true, } // SnippetFromBodies builds a short list-preview from plain and HTML bodies. func SnippetFromBodies(text, html string, maxLen int) string { candidates := snippetCandidates(text, html) best := pickBestSnippetLine(candidates) if best == "" { return "" } return truncate(stripSnippetMarkup(best), maxLen) } // stripSnippetMarkup removes HTML tags and entities from preview text. func stripSnippetMarkup(s string) string { s = strings.TrimSpace(s) if s == "" { return "" } if snippetHTMLTagRE.MatchString(s) { s = snippetHTMLTagRE.ReplaceAllString(s, " ") } s = stdhtml.UnescapeString(s) return strings.Join(strings.Fields(s), " ") } func snippetCandidates(text, html string) []string { var out []string text = strings.TrimSpace(stripPlainTextPreheaderPadding(text)) if text != "" { out = append(out, splitSnippetSegments(text)...) } out = append(out, htmlSnippetCandidates(html)...) return out } func splitSnippetSegments(s string) []string { s = strings.ReplaceAll(s, "\r\n", "\n") raw := strings.FieldsFunc(s, func(r rune) bool { return r == '\n' }) var segments []string for _, line := range raw { line = stripSnippetMarkup(line) if line == "" { continue } segments = append(segments, line) } return segments } func htmlSnippetCandidates(raw string) []string { raw = strings.TrimSpace(raw) if raw == "" { return nil } raw = sanitize.StripHiddenEmailHTML(raw) doc, err := html.Parse(strings.NewReader(raw)) if err != nil { if flat := strings.TrimSpace(stripHTMLForSnippet(raw)); flat != "" { return splitSnippetSegments(flat) } return nil } seen := make(map[string]struct{}) var candidates []string add := func(s string) { s = stripSnippetMarkup(s) if s == "" { return } if _, ok := seen[s]; ok { return } seen[s] = struct{}{} candidates = append(candidates, s) } var walk func(*html.Node) walk = func(n *html.Node) { if n.Type == html.ElementNode { tag := strings.ToLower(n.Data) if snippetSkipTags[tag] { return } if snippetBlockTags[tag] { add(textFromHTMLSubtree(n)) return } } for c := n.FirstChild; c != nil; c = c.NextSibling { walk(c) } } walk(doc) if len(candidates) == 0 { add(textFromHTMLSubtree(doc)) } return candidates } func textFromHTMLSubtree(n *html.Node) string { var buf strings.Builder var walk func(*html.Node) walk = func(node *html.Node) { if node.Type == html.ElementNode && snippetSkipTags[strings.ToLower(node.Data)] { return } if node.Type == html.TextNode { t := strings.TrimSpace(node.Data) if t != "" { if buf.Len() > 0 { buf.WriteRune(' ') } buf.WriteString(t) } } for c := node.FirstChild; c != nil; c = c.NextSibling { walk(c) } } walk(n) return strings.TrimSpace(buf.String()) } func pickBestSnippetLine(candidates []string) string { var best string bestScore := -1 for _, c := range candidates { c = stripSnippetMarkup(c) if c == "" || isSnippetBoilerplate(c) { continue } score := snippetLineScore(c) if score > bestScore { bestScore = score best = c } } return best } func snippetLineScore(s string) int { letters := 0 for _, r := range s { if unicode.IsLetter(r) { letters++ } } if letters < 8 { return 0 } score := letters * 4 if len(s) > 40 && len(s) < 280 { score += 40 } if len(s) >= 280 { score += 10 } return score } func isSnippetBoilerplate(s string) bool { s = strings.TrimSpace(s) if s == "" || len(s) < 4 { return true } lower := strings.ToLower(s) if looksLikeCSSSnippet(s) { return true } if isMostlySeparatorLine(s) { return true } boilerplate := []string{ "afficher dans le navigateur", "view in browser", "view this email in your browser", "voir ce message en ligne", "si vous ne visualisez pas", "si vous n'arrivez pas à lire", "si vous n'arrivez pas a lire", "problems viewing this email", "having trouble viewing", "cliquer ici", "click here", "unsubscribe", "se désabonner", "se desabonner", "manage your preferences", "gérer vos préférences", } for _, phrase := range boilerplate { if strings.Contains(lower, phrase) && len(s) < 160 { return true } } if strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") { return len(s) < 100 } if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") && len(s) < 80 { return true } letterRatio := snippetLetterRatio(s) return letterRatio < 0.35 } func looksLikeCSSSnippet(s string) bool { lower := strings.ToLower(s) if strings.Contains(lower, ":root") || strings.Contains(lower, "color-scheme:") || strings.Contains(lower, "@media") || strings.Contains(lower, "