package sanitize import "regexp" var ( imgTagRe = regexp.MustCompile(`(?i)]*)>`) imgSrcAttrRe = regexp.MustCompile(`(?i)\bsrc\s*=`) imgLazySrcRe = regexp.MustCompile( `(?i)\bdata-(?:src|original|lazy-src|url|image|href)\s*=\s*("([^"]*)"|'([^']*)')`, ) ) // promoteEmailImageSources copies lazy-load URLs into src when newsletters omit src. func promoteEmailImageSources(html string) string { return imgTagRe.ReplaceAllStringFunc(html, func(tag string) string { attrs := imgTagRe.FindStringSubmatch(tag) if len(attrs) < 2 || imgSrcAttrRe.MatchString(attrs[1]) { return tag } lazy := imgLazySrcRe.FindStringSubmatch(attrs[1]) if len(lazy) < 4 { return tag } var quote, url string if lazy[2] != "" { quote, url = `"`, lazy[2] } else { quote, url = `'`, lazy[3] } if url == "" { return tag } return "" }) } func SanitizeHTML(html string) string { if html == "" { return "" } html = StripHiddenEmailHTML(html) html = policy.Sanitize(html) html = promoteEmailImageSources(html) return stripUnsafeCSSURLs(html) }