package sanitize import ( "bytes" "regexp" "strings" "golang.org/x/net/html" ) // StripHiddenEmailHTML removes invisible preheader / preview blocks common in marketing mail. // Must run before bluemonday, which strips display:none styles and would expose padding text. func StripHiddenEmailHTML(raw string) string { if raw == "" { return raw } doc, err := html.Parse(strings.NewReader(raw)) if err != nil { return stripHiddenEmailHTMLRegex(raw) } var remove []*html.Node var walk func(*html.Node) walk = func(n *html.Node) { if n.Type == html.ElementNode && shouldStripHiddenElement(n) { remove = append(remove, n) return } for c := n.FirstChild; c != nil; c = c.NextSibling { walk(c) } } walk(doc) for _, n := range remove { if n.Parent != nil { n.Parent.RemoveChild(n) } } var buf bytes.Buffer if err := html.Render(&buf, doc); err != nil { return stripHiddenEmailHTMLRegex(raw) } return buf.String() } func shouldStripHiddenElement(n *html.Node) bool { if n.Type != html.ElementNode { return false } if attrVal(n, "hidden") != "" { return true } if strings.EqualFold(attrVal(n, "aria-hidden"), "true") { return true } class := strings.ToLower(attrVal(n, "class")) if strings.Contains(class, "mcnpreviewtext") || strings.Contains(class, "preheader") || strings.Contains(class, "preview-text") { return true } style := strings.ToLower(attrVal(n, "style")) if style == "" { return false } styleCompact := strings.ReplaceAll(style, " ", "") return strings.Contains(styleCompact, "display:none") || strings.Contains(styleCompact, "mso-hide:all") || strings.Contains(styleCompact, "max-height:0") || strings.Contains(styleCompact, "opacity:0") || strings.Contains(styleCompact, "font-size:0") || strings.Contains(styleCompact, "visibility:hidden") || strings.Contains(styleCompact, "overflow:hidden") && strings.Contains(styleCompact, "max-height:0") } func attrVal(n *html.Node, key string) string { for _, a := range n.Attr { if strings.EqualFold(a.Key, key) { return a.Val } } return "" } func stripHiddenEmailHTMLRegex(raw string) string { patterns := []*regexp.Regexp{ regexp.MustCompile(`(?is)]*class="[^"]*mcnPreviewText[^"]*"[^>]*>.*?`), regexp.MustCompile(`(?is)