- Improved the `RepairSnippetWithBodies` function to streamline snippet rebuilding logic and reduce redundancy. - Introduced new utility functions for stripping CSS noise and decoding HTML entities in snippets. - Enhanced boilerplate detection to better identify low-quality snippets, including legal footers and view-in-browser prompts. - Added comprehensive tests for new functionality and edge cases in snippet processing.
162 lines
5.7 KiB
Go
162 lines
5.7 KiB
Go
package imap
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
func TestSnippetFromBodies_skipsStyleBlock(t *testing.T) {
|
|
html := `<html><head><style>:root { color-scheme: light dark; }</style></head>
|
|
<body><p>Meta for Business vous informe des nouveautés publicitaires.</p></body></html>`
|
|
got := SnippetFromBodies("", html, 200)
|
|
if strings.Contains(got, ":root") || strings.Contains(got, "color-scheme") {
|
|
t.Fatalf("snippet = %q, want body text not CSS", got)
|
|
}
|
|
if !strings.Contains(got, "nouveautés") {
|
|
t.Fatalf("snippet = %q, want meaningful body", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetFromBodies_skipsViewInBrowser(t *testing.T) {
|
|
html := `<html><body>
|
|
<p><a href="#">Afficher dans le navigateur</a></p>
|
|
<p>Webinar J-14 : boostez la recherche de vos données matériaux.</p>
|
|
</body></html>`
|
|
got := SnippetFromBodies("", html, 200)
|
|
lower := strings.ToLower(got)
|
|
if strings.Contains(lower, "afficher dans le navigateur") {
|
|
t.Fatalf("snippet = %q, want to skip boilerplate", got)
|
|
}
|
|
if !strings.Contains(got, "Webinar") {
|
|
t.Fatalf("snippet = %q, want real content", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetFromBodies_skipsViewInBrowserBracketedStored(t *testing.T) {
|
|
stored := "[ Afficher dans le navigateur ]..."
|
|
html := `<html><body><p>Contenu utile sur le webinar matériaux.</p></body></html>`
|
|
got := RepairSnippetWithBodies(stored, "", html)
|
|
if strings.Contains(strings.ToLower(got), "afficher dans le navigateur") {
|
|
t.Fatalf("snippet = %q", got)
|
|
}
|
|
if !strings.Contains(got, "webinar") {
|
|
t.Fatalf("snippet = %q, want body content", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetFromBodies_skipsSeparatorLine(t *testing.T) {
|
|
text := "----------------------------------------------------------------\nUn festival rétro au Château de Tilloloy arrive cet été."
|
|
got := SnippetFromBodies(text, "", 200)
|
|
if strings.HasPrefix(got, "---") {
|
|
t.Fatalf("snippet = %q, want content after separator", got)
|
|
}
|
|
if !strings.Contains(got, "festival") {
|
|
t.Fatalf("snippet = %q", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetFromBodies_skipsSeparatorOnlyLine(t *testing.T) {
|
|
stored := "--------------------------------------- * U..."
|
|
html := `<html><body><p>Un festival rétro au Château de Tilloloy arrive cet été.</p></body></html>`
|
|
got := RepairSnippetWithBodies(stored, "", html)
|
|
if strings.Contains(got, "---") {
|
|
t.Fatalf("snippet = %q, want no separator line", got)
|
|
}
|
|
if !strings.Contains(got, "festival") {
|
|
t.Fatalf("snippet = %q", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetFromBodies_skipsOVHFooter(t *testing.T) {
|
|
stored := "SAS OVH - https://www.ovh.com/ 2 rue Kellermann BP 80157..."
|
|
html := `<html><body><p>Votre facture OVH est disponible dans l'espace client.</p></body></html>`
|
|
got := RepairSnippetWithBodies(stored, "", html)
|
|
if strings.Contains(got, "Kellermann") {
|
|
t.Fatalf("snippet = %q, want no legal footer", got)
|
|
}
|
|
if !strings.Contains(got, "facture") {
|
|
t.Fatalf("snippet = %q", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetFromBodies_skipsFontFaceCSS(t *testing.T) {
|
|
stored := "@font-face { font-family: 'Playfair Display'; font-style: normal;..."
|
|
html := `<html><body><p>Découvrez notre collection printemps.</p></body></html>`
|
|
got := RepairSnippetWithBodies(stored, "", html)
|
|
if strings.Contains(got, "@font-face") {
|
|
t.Fatalf("snippet = %q", got)
|
|
}
|
|
if !strings.Contains(got, "collection") {
|
|
t.Fatalf("snippet = %q", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetFromBodies_stripsHTMLTags(t *testing.T) {
|
|
text := "<b>Bonjour</b> Eliott, votre <strong>commande</strong> est prête."
|
|
got := SnippetFromBodies(text, "", 200)
|
|
if strings.Contains(got, "<") || strings.Contains(got, ">") {
|
|
t.Fatalf("snippet = %q, want plain text without tags", got)
|
|
}
|
|
if !strings.Contains(got, "Bonjour") || !strings.Contains(got, "commande") {
|
|
t.Fatalf("snippet = %q, want readable text", got)
|
|
}
|
|
}
|
|
|
|
func TestRepairSnippetWithBodies_stripsStoredHTMLTags(t *testing.T) {
|
|
stored := "<span style=\"color:red\">Offre</span> limitée & exclusive"
|
|
got := RepairSnippetWithBodies(stored, "", "")
|
|
if strings.Contains(got, "<") {
|
|
t.Fatalf("snippet = %q, want tags stripped", got)
|
|
}
|
|
if !strings.Contains(got, "Offre") || !strings.Contains(got, "&") {
|
|
t.Fatalf("snippet = %q, want unescaped text", got)
|
|
}
|
|
}
|
|
|
|
func TestStripSnippetMarkup_decodesEntitiesAndCSSTail(t *testing.T) {
|
|
raw := "Victoria vient d'activer son compte sur passbolt /*//||//..."
|
|
got := stripSnippetMarkup(raw)
|
|
if strings.Contains(got, "'") || strings.Contains(got, "'") {
|
|
t.Fatalf("snippet = %q, want decoded apostrophe", got)
|
|
}
|
|
if !strings.Contains(got, "d'activer") {
|
|
t.Fatalf("snippet = %q, want apostrophe", got)
|
|
}
|
|
if strings.Contains(got, "/*") || strings.Contains(got, "||//") {
|
|
t.Fatalf("snippet = %q, want CSS tail removed", got)
|
|
}
|
|
}
|
|
|
|
func TestSnippetLooksLowQuality_encodedEntities(t *testing.T) {
|
|
if !SnippetLooksLowQuality("Hello d'activer") {
|
|
t.Fatal("expected encoded entities to be low quality")
|
|
}
|
|
}
|
|
|
|
func TestRepairSnippetWithBodies_replacesCSSPreview(t *testing.T) {
|
|
stored := "FacebookMeta for Business :root { color-scheme: light dark;"
|
|
html := `<html><body><p>Inclure automatiquement des informations plus détaillées sur le compte.</p></body></html>`
|
|
got := RepairSnippetWithBodies(stored, "", html)
|
|
if strings.Contains(got, ":root") {
|
|
t.Fatalf("snippet = %q", got)
|
|
}
|
|
if !strings.Contains(got, "automatiquement") {
|
|
t.Fatalf("snippet = %q, want rebuilt from html", got)
|
|
}
|
|
}
|
|
|
|
func TestIsSnippetBoilerplate_userReportedCases(t *testing.T) {
|
|
cases := []string{
|
|
"--------------------------------------- * U...",
|
|
"[ Afficher dans le navigateur ]...",
|
|
"SAS OVH - https://www.ovh.com/ 2 rue Kellermann BP 80157...",
|
|
"FacebookMeta for Business :root { Color-scheme: light dark;...",
|
|
"@font-face { font-family: 'Playfair Display'; font-style: normal;...",
|
|
}
|
|
for _, c := range cases {
|
|
if !isSnippetBoilerplate(c) {
|
|
t.Fatalf("expected boilerplate for %q", c)
|
|
}
|
|
}
|
|
}
|