package sanitize import ( "bytes" "regexp" "strings" "golang.org/x/net/html" ) // StripHiddenEmailHTML removes invisible preheader / preview blocks common in marketing mail. // Must run before bluemonday, which strips display:none styles and would expose padding text. func StripHiddenEmailHTML(raw string) string { if raw == "" { return raw } doc, err := html.Parse(strings.NewReader(raw)) if err != nil { return stripHiddenEmailHTMLRegex(raw) } var remove []*html.Node var walk func(*html.Node) walk = func(n *html.Node) { if n.Type == html.ElementNode && shouldStripHiddenElement(n) { remove = append(remove, n) return } for c := n.FirstChild; c != nil; c = c.NextSibling { walk(c) } } walk(doc) for _, n := range remove { if n.Parent != nil { n.Parent.RemoveChild(n) } } var buf bytes.Buffer if err := html.Render(&buf, doc); err != nil { return stripHiddenEmailHTMLRegex(raw) } return buf.String() } func shouldStripHiddenElement(n *html.Node) bool { if n.Type != html.ElementNode { return false } if attrVal(n, "hidden") != "" { return true } if strings.EqualFold(attrVal(n, "aria-hidden"), "true") { return true } class := strings.ToLower(attrVal(n, "class")) if strings.Contains(class, "mcnpreviewtext") || strings.Contains(class, "preheader") || strings.Contains(class, "preview-text") { return true } style := strings.ToLower(attrVal(n, "style")) if style == "" { return false } styleCompact := strings.ReplaceAll(style, " ", "") return strings.Contains(styleCompact, "display:none") || strings.Contains(styleCompact, "mso-hide:all") || strings.Contains(styleCompact, "max-height:0") || strings.Contains(styleCompact, "opacity:0") || strings.Contains(styleCompact, "font-size:0") || strings.Contains(styleCompact, "visibility:hidden") || strings.Contains(styleCompact, "overflow:hidden") && strings.Contains(styleCompact, "max-height:0") } func attrVal(n *html.Node, key string) string { for _, a := range n.Attr { if strings.EqualFold(a.Key, key) { return a.Val } } return "" } func stripHiddenEmailHTMLRegex(raw string) string { patterns := []*regexp.Regexp{ regexp.MustCompile(`(?is)]*class="[^"]*mcnPreviewText[^"]*"[^>]*>.*?`), regexp.MustCompile(`(?is)]*style="[^"]*display\s*:\s*none[^"]*"[^>]*>.*?`), } out := raw for _, re := range patterns { out = re.ReplaceAllString(out, "") } return out } func isInvisiblePaddingRune(r rune) bool { switch r { case '\u034f', '\u200b', '\u200c', '\u200d', '\u200e', '\u200f', '\ufeff', '\u00a0', '\u2007': return true default: return false } } // StripInvisibleTextRuns removes repeated invisible Unicode padding from plain text previews. func StripInvisibleTextRuns(s string) string { if s == "" { return s } var b strings.Builder b.Grow(len(s)) for _, r := range s { if isInvisiblePaddingRune(r) { continue } b.WriteRune(r) } return strings.Join(strings.Fields(b.String()), " ") }