120 lines
2.9 KiB
Go
120 lines
2.9 KiB
Go
package sanitize
|
|
|
|
import (
|
|
"bytes"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// StripHiddenEmailHTML removes invisible preheader / preview blocks common in marketing mail.
|
|
// Must run before bluemonday, which strips display:none styles and would expose padding text.
|
|
func StripHiddenEmailHTML(raw string) string {
|
|
if raw == "" {
|
|
return raw
|
|
}
|
|
doc, err := html.Parse(strings.NewReader(raw))
|
|
if err != nil {
|
|
return stripHiddenEmailHTMLRegex(raw)
|
|
}
|
|
var remove []*html.Node
|
|
var walk func(*html.Node)
|
|
walk = func(n *html.Node) {
|
|
if n.Type == html.ElementNode && shouldStripHiddenElement(n) {
|
|
remove = append(remove, n)
|
|
return
|
|
}
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
walk(c)
|
|
}
|
|
}
|
|
walk(doc)
|
|
for _, n := range remove {
|
|
if n.Parent != nil {
|
|
n.Parent.RemoveChild(n)
|
|
}
|
|
}
|
|
var buf bytes.Buffer
|
|
if err := html.Render(&buf, doc); err != nil {
|
|
return stripHiddenEmailHTMLRegex(raw)
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
func shouldStripHiddenElement(n *html.Node) bool {
|
|
if n.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
if attrVal(n, "hidden") != "" {
|
|
return true
|
|
}
|
|
if strings.EqualFold(attrVal(n, "aria-hidden"), "true") {
|
|
return true
|
|
}
|
|
class := strings.ToLower(attrVal(n, "class"))
|
|
if strings.Contains(class, "mcnpreviewtext") ||
|
|
strings.Contains(class, "preheader") ||
|
|
strings.Contains(class, "preview-text") {
|
|
return true
|
|
}
|
|
style := strings.ToLower(attrVal(n, "style"))
|
|
if style == "" {
|
|
return false
|
|
}
|
|
styleCompact := strings.ReplaceAll(style, " ", "")
|
|
return strings.Contains(styleCompact, "display:none") ||
|
|
strings.Contains(styleCompact, "mso-hide:all") ||
|
|
strings.Contains(styleCompact, "max-height:0") ||
|
|
strings.Contains(styleCompact, "opacity:0") ||
|
|
strings.Contains(styleCompact, "font-size:0") ||
|
|
strings.Contains(styleCompact, "visibility:hidden") ||
|
|
strings.Contains(styleCompact, "overflow:hidden") && strings.Contains(styleCompact, "max-height:0")
|
|
}
|
|
|
|
func attrVal(n *html.Node, key string) string {
|
|
for _, a := range n.Attr {
|
|
if strings.EqualFold(a.Key, key) {
|
|
return a.Val
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func stripHiddenEmailHTMLRegex(raw string) string {
|
|
patterns := []*regexp.Regexp{
|
|
regexp.MustCompile(`(?is)<span[^>]*class="[^"]*mcnPreviewText[^"]*"[^>]*>.*?</span>`),
|
|
regexp.MustCompile(`(?is)<div[^>]*style="[^"]*display\s*:\s*none[^"]*"[^>]*>.*?</div>`),
|
|
}
|
|
out := raw
|
|
for _, re := range patterns {
|
|
out = re.ReplaceAllString(out, "")
|
|
}
|
|
return out
|
|
}
|
|
|
|
func isInvisiblePaddingRune(r rune) bool {
|
|
switch r {
|
|
case '\u034f', '\u200b', '\u200c', '\u200d', '\u200e', '\u200f', '\ufeff', '\u00a0', '\u2007':
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// StripInvisibleTextRuns removes repeated invisible Unicode padding from plain text previews.
|
|
func StripInvisibleTextRuns(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
var b strings.Builder
|
|
b.Grow(len(s))
|
|
for _, r := range s {
|
|
if isInvisiblePaddingRune(r) {
|
|
continue
|
|
}
|
|
b.WriteRune(r)
|
|
}
|
|
return strings.Join(strings.Fields(b.String()), " ")
|
|
}
|