46 lines
1.1 KiB
Go
46 lines
1.1 KiB
Go
package sanitize
|
|
|
|
import "regexp"
|
|
|
|
var (
|
|
imgTagRe = regexp.MustCompile(`(?i)<img\b([^>]*)>`)
|
|
imgSrcAttrRe = regexp.MustCompile(`(?i)\bsrc\s*=`)
|
|
imgLazySrcRe = regexp.MustCompile(
|
|
`(?i)\bdata-(?:src|original|lazy-src|url|image|href)\s*=\s*("([^"]*)"|'([^']*)')`,
|
|
)
|
|
)
|
|
|
|
// promoteEmailImageSources copies lazy-load URLs into src when newsletters omit src.
|
|
func promoteEmailImageSources(html string) string {
|
|
return imgTagRe.ReplaceAllStringFunc(html, func(tag string) string {
|
|
attrs := imgTagRe.FindStringSubmatch(tag)
|
|
if len(attrs) < 2 || imgSrcAttrRe.MatchString(attrs[1]) {
|
|
return tag
|
|
}
|
|
lazy := imgLazySrcRe.FindStringSubmatch(attrs[1])
|
|
if len(lazy) < 4 {
|
|
return tag
|
|
}
|
|
var quote, url string
|
|
if lazy[2] != "" {
|
|
quote, url = `"`, lazy[2]
|
|
} else {
|
|
quote, url = `'`, lazy[3]
|
|
}
|
|
if url == "" {
|
|
return tag
|
|
}
|
|
return "<img" + attrs[1] + ` src=` + quote + url + quote + ">"
|
|
})
|
|
}
|
|
|
|
func SanitizeHTML(html string) string {
|
|
if html == "" {
|
|
return ""
|
|
}
|
|
html = StripHiddenEmailHTML(html)
|
|
html = policy.Sanitize(html)
|
|
html = promoteEmailImageSources(html)
|
|
return stripUnsafeCSSURLs(html)
|
|
}
|