Refactor snippet processing and enhance boilerplate detection
- Improved the `RepairSnippetWithBodies` function to streamline snippet rebuilding logic and reduce redundancy. - Introduced new utility functions for stripping CSS noise and decoding HTML entities in snippets. - Enhanced boilerplate detection to better identify low-quality snippets, including legal footers and view-in-browser prompts. - Added comprehensive tests for new functionality and edge cases in snippet processing.
This commit is contained in:
parent
25d3ac4cd9
commit
69bde44b94
@ -51,12 +51,12 @@ func RepairSnippet(snippet string) string {
|
|||||||
func RepairSnippetWithBodies(snippet, bodyText, bodyHTML string) string {
|
func RepairSnippetWithBodies(snippet, bodyText, bodyHTML string) string {
|
||||||
snippet = stripSnippetMarkup(snippet)
|
snippet = stripSnippetMarkup(snippet)
|
||||||
if decoded := decodeBareQuotedPrintableIfNeeded(snippet); decoded != snippet {
|
if decoded := decodeBareQuotedPrintableIfNeeded(snippet); decoded != snippet {
|
||||||
snippet = decoded
|
snippet = stripSnippetMarkup(decoded)
|
||||||
}
|
}
|
||||||
if decoded := decodeBareBase64IfNeeded(snippet); decoded != snippet {
|
if decoded := decodeBareBase64IfNeeded(snippet); decoded != snippet {
|
||||||
snippet = decoded
|
snippet = stripSnippetMarkup(decoded)
|
||||||
}
|
}
|
||||||
snippet = stripPlainTextPreheaderPadding(snippet)
|
snippet = stripSnippetMarkup(stripPlainTextPreheaderPadding(snippet))
|
||||||
if looksLikeRawMIME(snippet) {
|
if looksLikeRawMIME(snippet) {
|
||||||
t, h, ok := parseEmbeddedMIME([]byte(snippet))
|
t, h, ok := parseEmbeddedMIME([]byte(snippet))
|
||||||
if ok {
|
if ok {
|
||||||
@ -65,16 +65,13 @@ func RepairSnippetWithBodies(snippet, bodyText, bodyHTML string) string {
|
|||||||
}
|
}
|
||||||
bodyText, bodyHTML = RepairStoredBodies(bodyText, bodyHTML)
|
bodyText, bodyHTML = RepairStoredBodies(bodyText, bodyHTML)
|
||||||
if bodyText != "" || bodyHTML != "" {
|
if bodyText != "" || bodyHTML != "" {
|
||||||
if rebuilt := SnippetFromBodies(bodyText, bodyHTML, 200); rebuilt != "" {
|
rebuilt := SnippetFromBodies(bodyText, bodyHTML, 200)
|
||||||
if SnippetLooksLowQuality(snippet) || snippet == "" {
|
storedBad := snippet == "" || SnippetLooksLowQuality(snippet) || isSnippetBoilerplate(snippet)
|
||||||
return rebuilt
|
if rebuilt != "" && (storedBad || snippetLineScore(rebuilt) > snippetLineScore(snippet)) {
|
||||||
}
|
return rebuilt
|
||||||
if SnippetLooksLowQuality(rebuilt) {
|
}
|
||||||
return snippet
|
if storedBad {
|
||||||
}
|
return rebuilt
|
||||||
if snippetLineScore(rebuilt) > snippetLineScore(snippet) {
|
|
||||||
return rebuilt
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if snippet == "" {
|
if snippet == "" {
|
||||||
|
|||||||
@ -11,7 +11,12 @@ import (
|
|||||||
"github.com/ultisuite/ulti-backend/internal/mail/sanitize"
|
"github.com/ultisuite/ulti-backend/internal/mail/sanitize"
|
||||||
)
|
)
|
||||||
|
|
||||||
var snippetHTMLTagRE = regexp.MustCompile(`(?is)<[^>]*>`)
|
var (
|
||||||
|
snippetHTMLTagRE = regexp.MustCompile(`(?is)<[^>]*>`)
|
||||||
|
snippetHTMLEntityRE = regexp.MustCompile(`(?i)(?:&#x?[0-9a-f]+;|&[a-z][a-z0-9]{1,8};)`)
|
||||||
|
snippetStyleBlockRE = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
|
||||||
|
snippetSeparatorRunRE = regexp.MustCompile(`-{8,}|_{8,}|={8,}|\*{8,}`)
|
||||||
|
)
|
||||||
|
|
||||||
var snippetSkipTags = map[string]bool{
|
var snippetSkipTags = map[string]bool{
|
||||||
"script": true, "style": true, "head": true, "noscript": true,
|
"script": true, "style": true, "head": true, "noscript": true,
|
||||||
@ -28,13 +33,21 @@ var snippetBlockTags = map[string]bool{
|
|||||||
func SnippetFromBodies(text, html string, maxLen int) string {
|
func SnippetFromBodies(text, html string, maxLen int) string {
|
||||||
candidates := snippetCandidates(text, html)
|
candidates := snippetCandidates(text, html)
|
||||||
best := pickBestSnippetLine(candidates)
|
best := pickBestSnippetLine(candidates)
|
||||||
|
if best == "" && strings.TrimSpace(html) != "" {
|
||||||
|
flat := stripSnippetMarkup(stripHTMLForSnippet(stripStyleBlocksFromHTML(html)))
|
||||||
|
best = pickBestSnippetLine(splitSnippetSegments(flat))
|
||||||
|
}
|
||||||
if best == "" {
|
if best == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
return truncate(stripSnippetMarkup(best), maxLen)
|
return truncate(stripSnippetMarkup(best), maxLen)
|
||||||
}
|
}
|
||||||
|
|
||||||
// stripSnippetMarkup removes HTML tags and entities from preview text.
|
func stripStyleBlocksFromHTML(html string) string {
|
||||||
|
return snippetStyleBlockRE.ReplaceAllString(html, " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripSnippetMarkup removes HTML tags, entities, and CSS noise from preview text.
|
||||||
func stripSnippetMarkup(s string) string {
|
func stripSnippetMarkup(s string) string {
|
||||||
s = strings.TrimSpace(s)
|
s = strings.TrimSpace(s)
|
||||||
if s == "" {
|
if s == "" {
|
||||||
@ -43,10 +56,44 @@ func stripSnippetMarkup(s string) string {
|
|||||||
if snippetHTMLTagRE.MatchString(s) {
|
if snippetHTMLTagRE.MatchString(s) {
|
||||||
s = snippetHTMLTagRE.ReplaceAllString(s, " ")
|
s = snippetHTMLTagRE.ReplaceAllString(s, " ")
|
||||||
}
|
}
|
||||||
s = stdhtml.UnescapeString(s)
|
s = unescapeSnippetEntities(s)
|
||||||
|
s = stripSnippetCSSTail(s)
|
||||||
return strings.Join(strings.Fields(s), " ")
|
return strings.Join(strings.Fields(s), " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func unescapeSnippetEntities(s string) string {
|
||||||
|
for i := 0; i < 4; i++ {
|
||||||
|
prev := s
|
||||||
|
s = stdhtml.UnescapeString(s)
|
||||||
|
s = snippetHTMLEntityRE.ReplaceAllStringFunc(s, func(entity string) string {
|
||||||
|
return stdhtml.UnescapeString(entity)
|
||||||
|
})
|
||||||
|
if s == prev {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripSnippetCSSTail removes trailing CSS comment junk often leaked into stored snippets.
|
||||||
|
func stripSnippetCSSTail(s string) string {
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
for _, marker := range []string{"/*//", "/*", "//||"} {
|
||||||
|
if idx := strings.Index(s, marker); idx >= 0 {
|
||||||
|
head := strings.TrimSpace(s[:idx])
|
||||||
|
if len(head) >= 12 {
|
||||||
|
s = head
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.TrimRight(s, " /*-|_")
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasUndecodedHTMLEntities(s string) bool {
|
||||||
|
return snippetHTMLEntityRE.MatchString(s)
|
||||||
|
}
|
||||||
|
|
||||||
func snippetCandidates(text, html string) []string {
|
func snippetCandidates(text, html string) []string {
|
||||||
var out []string
|
var out []string
|
||||||
text = strings.TrimSpace(stripPlainTextPreheaderPadding(text))
|
text = strings.TrimSpace(stripPlainTextPreheaderPadding(text))
|
||||||
@ -64,11 +111,13 @@ func splitSnippetSegments(s string) []string {
|
|||||||
})
|
})
|
||||||
var segments []string
|
var segments []string
|
||||||
for _, line := range raw {
|
for _, line := range raw {
|
||||||
line = stripSnippetMarkup(line)
|
for _, part := range snippetSeparatorRunRE.Split(line, -1) {
|
||||||
if line == "" {
|
part = stripSnippetMarkup(part)
|
||||||
continue
|
if part == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
segments = append(segments, part)
|
||||||
}
|
}
|
||||||
segments = append(segments, line)
|
|
||||||
}
|
}
|
||||||
return segments
|
return segments
|
||||||
}
|
}
|
||||||
@ -79,6 +128,7 @@ func htmlSnippetCandidates(raw string) []string {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
raw = sanitize.StripHiddenEmailHTML(raw)
|
raw = sanitize.StripHiddenEmailHTML(raw)
|
||||||
|
raw = stripStyleBlocksFromHTML(raw)
|
||||||
doc, err := html.Parse(strings.NewReader(raw))
|
doc, err := html.Parse(strings.NewReader(raw))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if flat := strings.TrimSpace(stripHTMLForSnippet(raw)); flat != "" {
|
if flat := strings.TrimSpace(stripHTMLForSnippet(raw)); flat != "" {
|
||||||
@ -164,6 +214,9 @@ func pickBestSnippetLine(candidates []string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func snippetLineScore(s string) int {
|
func snippetLineScore(s string) int {
|
||||||
|
if isSnippetBoilerplate(s) {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
letters := 0
|
letters := 0
|
||||||
for _, r := range s {
|
for _, r := range s {
|
||||||
if unicode.IsLetter(r) {
|
if unicode.IsLetter(r) {
|
||||||
@ -180,6 +233,9 @@ func snippetLineScore(s string) int {
|
|||||||
if len(s) >= 280 {
|
if len(s) >= 280 {
|
||||||
score += 10
|
score += 10
|
||||||
}
|
}
|
||||||
|
if strings.Contains(strings.ToLower(s), "http://") || strings.Contains(strings.ToLower(s), "https://") {
|
||||||
|
score -= 30
|
||||||
|
}
|
||||||
return score
|
return score
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,18 +244,17 @@ func isSnippetBoilerplate(s string) bool {
|
|||||||
if s == "" || len(s) < 4 {
|
if s == "" || len(s) < 4 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
lower := strings.ToLower(s)
|
|
||||||
if looksLikeCSSSnippet(s) {
|
if looksLikeCSSSnippet(s) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if isMostlySeparatorLine(s) {
|
if isMostlySeparatorLine(s) || hasLeadingSeparatorRun(s) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
if isViewInBrowserSnippet(s) || isSnippetLegalFooter(s) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
lower := strings.ToLower(s)
|
||||||
boilerplate := []string{
|
boilerplate := []string{
|
||||||
"afficher dans le navigateur",
|
|
||||||
"view in browser",
|
|
||||||
"view this email in your browser",
|
|
||||||
"voir ce message en ligne",
|
|
||||||
"si vous ne visualisez pas",
|
"si vous ne visualisez pas",
|
||||||
"si vous n'arrivez pas à lire",
|
"si vous n'arrivez pas à lire",
|
||||||
"si vous n'arrivez pas a lire",
|
"si vous n'arrivez pas a lire",
|
||||||
@ -214,39 +269,116 @@ func isSnippetBoilerplate(s string) bool {
|
|||||||
"gérer vos préférences",
|
"gérer vos préférences",
|
||||||
}
|
}
|
||||||
for _, phrase := range boilerplate {
|
for _, phrase := range boilerplate {
|
||||||
if strings.Contains(lower, phrase) && len(s) < 160 {
|
if strings.Contains(lower, phrase) && len(s) < 200 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") {
|
if strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") {
|
||||||
return len(s) < 100
|
return len(s) < 120
|
||||||
}
|
|
||||||
if strings.HasPrefix(s, "[") && strings.HasSuffix(s, "]") && len(s) < 80 {
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
letterRatio := snippetLetterRatio(s)
|
letterRatio := snippetLetterRatio(s)
|
||||||
return letterRatio < 0.35
|
return letterRatio < 0.35
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func normalizeSnippetMatchText(s string) string {
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
s = strings.NewReplacer("[", " ", "]", " ", "(", " ", ")", " ", "·", " ").Replace(s)
|
||||||
|
return strings.Join(strings.Fields(s), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func isViewInBrowserSnippet(s string) bool {
|
||||||
|
norm := normalizeSnippetMatchText(s)
|
||||||
|
phrases := []string{
|
||||||
|
"afficher dans le navigateur",
|
||||||
|
"view in browser",
|
||||||
|
"view this email in your browser",
|
||||||
|
"voir ce message en ligne",
|
||||||
|
"voir la version en ligne",
|
||||||
|
"version en ligne",
|
||||||
|
}
|
||||||
|
for _, phrase := range phrases {
|
||||||
|
if !strings.Contains(norm, phrase) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rest := strings.TrimSpace(strings.ReplaceAll(norm, phrase, ""))
|
||||||
|
if len(rest) <= 20 || len(s) <= 90 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSnippetLegalFooter(s string) bool {
|
||||||
|
lower := strings.ToLower(s)
|
||||||
|
if !strings.Contains(lower, "http") && !strings.Contains(lower, "www.") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
markers := []string{
|
||||||
|
"sas ", "sarl ", " sa ", " sas.", "rue ", " bp ", "rcs ",
|
||||||
|
"kellermann", "ovh.com", "www.ovh",
|
||||||
|
}
|
||||||
|
hits := 0
|
||||||
|
for _, m := range markers {
|
||||||
|
if strings.Contains(lower, m) {
|
||||||
|
hits++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if hits >= 2 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.Contains(lower, "https://") && (strings.Contains(lower, "rue ") || strings.Contains(lower, " bp ")) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return strings.HasPrefix(lower, "sas ") && strings.Contains(lower, "http")
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasLeadingSeparatorRun(s string) bool {
|
||||||
|
trimmed := strings.TrimSpace(s)
|
||||||
|
if len(trimmed) < 12 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
run := 0
|
||||||
|
for _, r := range trimmed {
|
||||||
|
switch r {
|
||||||
|
case '-', '_', '*', '=', '·', '—':
|
||||||
|
run++
|
||||||
|
default:
|
||||||
|
goto done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
return run >= 12
|
||||||
|
}
|
||||||
|
|
||||||
func looksLikeCSSSnippet(s string) bool {
|
func looksLikeCSSSnippet(s string) bool {
|
||||||
lower := strings.ToLower(s)
|
lower := strings.ToLower(s)
|
||||||
if strings.Contains(lower, ":root") ||
|
if strings.Contains(lower, ":root") ||
|
||||||
strings.Contains(lower, "color-scheme:") ||
|
strings.Contains(lower, "color-scheme:") ||
|
||||||
strings.Contains(lower, "@media") ||
|
strings.Contains(lower, "@media") ||
|
||||||
|
strings.Contains(lower, "@font-face") ||
|
||||||
|
strings.Contains(lower, "font-family:") ||
|
||||||
strings.Contains(lower, "<!--") {
|
strings.Contains(lower, "<!--") {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
if strings.Contains(lower, "facebook") && strings.Contains(lower, ":root") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.Contains(lower, "meta for business") && strings.Contains(s, "{") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
if strings.HasPrefix(strings.TrimSpace(s), "/*") {
|
if strings.HasPrefix(strings.TrimSpace(s), "/*") {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
semis := strings.Count(s, ";")
|
if strings.Contains(s, "/*//") || strings.Contains(s, "||//") || strings.Contains(s, "//||") {
|
||||||
braces := strings.Count(s, "{") + strings.Count(s, "}")
|
|
||||||
if braces >= 2 && semis >= 2 {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if strings.Contains(s, "{") && strings.Contains(s, "}") &&
|
semis := strings.Count(s, ";")
|
||||||
(strings.Contains(lower, "font-") || strings.Contains(lower, "margin:") || strings.Contains(lower, "padding:")) {
|
braces := strings.Count(s, "{") + strings.Count(s, "}")
|
||||||
return true
|
if braces >= 1 && semis >= 1 && strings.Contains(s, "{") && strings.Contains(s, "}") {
|
||||||
|
if strings.Contains(lower, "font-") || strings.Contains(lower, "margin:") ||
|
||||||
|
strings.Contains(lower, "padding:") || strings.Contains(lower, "font-family") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -258,11 +390,15 @@ func isMostlySeparatorLine(s string) bool {
|
|||||||
sep := 0
|
sep := 0
|
||||||
for _, r := range s {
|
for _, r := range s {
|
||||||
switch r {
|
switch r {
|
||||||
case '-', '_', '*', '=', '·', '—':
|
case '-', '_', '*', '=', '·', '—', '|':
|
||||||
sep++
|
sep++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return float64(sep)/float64(len(s)) >= 0.6
|
runes := len([]rune(s))
|
||||||
|
if runes == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return float64(sep)/float64(runes) >= 0.55
|
||||||
}
|
}
|
||||||
|
|
||||||
func snippetLetterRatio(s string) float64 {
|
func snippetLetterRatio(s string) float64 {
|
||||||
@ -284,5 +420,8 @@ func SnippetLooksLowQuality(snippet string) bool {
|
|||||||
if snippet == "" {
|
if snippet == "" {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return isSnippetBoilerplate(snippet) || looksLikeCSSSnippet(snippet) || snippetHTMLTagRE.MatchString(snippet)
|
return isSnippetBoilerplate(snippet) ||
|
||||||
|
looksLikeCSSSnippet(snippet) ||
|
||||||
|
snippetHTMLTagRE.MatchString(snippet) ||
|
||||||
|
hasUndecodedHTMLEntities(snippet)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -32,6 +32,18 @@ func TestSnippetFromBodies_skipsViewInBrowser(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSnippetFromBodies_skipsViewInBrowserBracketedStored(t *testing.T) {
|
||||||
|
stored := "[ Afficher dans le navigateur ]..."
|
||||||
|
html := `<html><body><p>Contenu utile sur le webinar matériaux.</p></body></html>`
|
||||||
|
got := RepairSnippetWithBodies(stored, "", html)
|
||||||
|
if strings.Contains(strings.ToLower(got), "afficher dans le navigateur") {
|
||||||
|
t.Fatalf("snippet = %q", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got, "webinar") {
|
||||||
|
t.Fatalf("snippet = %q, want body content", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSnippetFromBodies_skipsSeparatorLine(t *testing.T) {
|
func TestSnippetFromBodies_skipsSeparatorLine(t *testing.T) {
|
||||||
text := "----------------------------------------------------------------\nUn festival rétro au Château de Tilloloy arrive cet été."
|
text := "----------------------------------------------------------------\nUn festival rétro au Château de Tilloloy arrive cet été."
|
||||||
got := SnippetFromBodies(text, "", 200)
|
got := SnippetFromBodies(text, "", 200)
|
||||||
@ -43,6 +55,42 @@ func TestSnippetFromBodies_skipsSeparatorLine(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSnippetFromBodies_skipsSeparatorOnlyLine(t *testing.T) {
|
||||||
|
stored := "--------------------------------------- * U..."
|
||||||
|
html := `<html><body><p>Un festival rétro au Château de Tilloloy arrive cet été.</p></body></html>`
|
||||||
|
got := RepairSnippetWithBodies(stored, "", html)
|
||||||
|
if strings.Contains(got, "---") {
|
||||||
|
t.Fatalf("snippet = %q, want no separator line", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got, "festival") {
|
||||||
|
t.Fatalf("snippet = %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSnippetFromBodies_skipsOVHFooter(t *testing.T) {
|
||||||
|
stored := "SAS OVH - https://www.ovh.com/ 2 rue Kellermann BP 80157..."
|
||||||
|
html := `<html><body><p>Votre facture OVH est disponible dans l'espace client.</p></body></html>`
|
||||||
|
got := RepairSnippetWithBodies(stored, "", html)
|
||||||
|
if strings.Contains(got, "Kellermann") {
|
||||||
|
t.Fatalf("snippet = %q, want no legal footer", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got, "facture") {
|
||||||
|
t.Fatalf("snippet = %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSnippetFromBodies_skipsFontFaceCSS(t *testing.T) {
|
||||||
|
stored := "@font-face { font-family: 'Playfair Display'; font-style: normal;..."
|
||||||
|
html := `<html><body><p>Découvrez notre collection printemps.</p></body></html>`
|
||||||
|
got := RepairSnippetWithBodies(stored, "", html)
|
||||||
|
if strings.Contains(got, "@font-face") {
|
||||||
|
t.Fatalf("snippet = %q", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got, "collection") {
|
||||||
|
t.Fatalf("snippet = %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSnippetFromBodies_stripsHTMLTags(t *testing.T) {
|
func TestSnippetFromBodies_stripsHTMLTags(t *testing.T) {
|
||||||
text := "<b>Bonjour</b> Eliott, votre <strong>commande</strong> est prête."
|
text := "<b>Bonjour</b> Eliott, votre <strong>commande</strong> est prête."
|
||||||
got := SnippetFromBodies(text, "", 200)
|
got := SnippetFromBodies(text, "", 200)
|
||||||
@ -65,6 +113,26 @@ func TestRepairSnippetWithBodies_stripsStoredHTMLTags(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStripSnippetMarkup_decodesEntitiesAndCSSTail(t *testing.T) {
|
||||||
|
raw := "Victoria vient d'activer son compte sur passbolt /*//||//..."
|
||||||
|
got := stripSnippetMarkup(raw)
|
||||||
|
if strings.Contains(got, "'") || strings.Contains(got, "'") {
|
||||||
|
t.Fatalf("snippet = %q, want decoded apostrophe", got)
|
||||||
|
}
|
||||||
|
if !strings.Contains(got, "d'activer") {
|
||||||
|
t.Fatalf("snippet = %q, want apostrophe", got)
|
||||||
|
}
|
||||||
|
if strings.Contains(got, "/*") || strings.Contains(got, "||//") {
|
||||||
|
t.Fatalf("snippet = %q, want CSS tail removed", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSnippetLooksLowQuality_encodedEntities(t *testing.T) {
|
||||||
|
if !SnippetLooksLowQuality("Hello d'activer") {
|
||||||
|
t.Fatal("expected encoded entities to be low quality")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRepairSnippetWithBodies_replacesCSSPreview(t *testing.T) {
|
func TestRepairSnippetWithBodies_replacesCSSPreview(t *testing.T) {
|
||||||
stored := "FacebookMeta for Business :root { color-scheme: light dark;"
|
stored := "FacebookMeta for Business :root { color-scheme: light dark;"
|
||||||
html := `<html><body><p>Inclure automatiquement des informations plus détaillées sur le compte.</p></body></html>`
|
html := `<html><body><p>Inclure automatiquement des informations plus détaillées sur le compte.</p></body></html>`
|
||||||
@ -76,3 +144,18 @@ func TestRepairSnippetWithBodies_replacesCSSPreview(t *testing.T) {
|
|||||||
t.Fatalf("snippet = %q, want rebuilt from html", got)
|
t.Fatalf("snippet = %q, want rebuilt from html", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIsSnippetBoilerplate_userReportedCases(t *testing.T) {
|
||||||
|
cases := []string{
|
||||||
|
"--------------------------------------- * U...",
|
||||||
|
"[ Afficher dans le navigateur ]...",
|
||||||
|
"SAS OVH - https://www.ovh.com/ 2 rue Kellermann BP 80157...",
|
||||||
|
"FacebookMeta for Business :root { Color-scheme: light dark;...",
|
||||||
|
"@font-face { font-family: 'Playfair Display'; font-style: normal;...",
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
if !isSnippetBoilerplate(c) {
|
||||||
|
t.Fatalf("expected boilerplate for %q", c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user