ultisuite-backend/internal/mail/imap/snippet_test.go
R3D347HR4Y 69bde44b94 Refactor snippet processing and enhance boilerplate detection
- Improved the `RepairSnippetWithBodies` function to streamline snippet rebuilding logic and reduce redundancy.
- Introduced new utility functions for stripping CSS noise and decoding HTML entities in snippets.
- Enhanced boilerplate detection to better identify low-quality snippets, including legal footers and view-in-browser prompts.
- Added comprehensive tests for new functionality and edge cases in snippet processing.
2026-06-04 10:49:22 +02:00

162 lines
5.7 KiB
Go

package imap
import (
"strings"
"testing"
)
func TestSnippetFromBodies_skipsStyleBlock(t *testing.T) {
html := `<html><head><style>:root { color-scheme: light dark; }</style></head>
<body><p>Meta for Business vous informe des nouveautés publicitaires.</p></body></html>`
got := SnippetFromBodies("", html, 200)
if strings.Contains(got, ":root") || strings.Contains(got, "color-scheme") {
t.Fatalf("snippet = %q, want body text not CSS", got)
}
if !strings.Contains(got, "nouveautés") {
t.Fatalf("snippet = %q, want meaningful body", got)
}
}
func TestSnippetFromBodies_skipsViewInBrowser(t *testing.T) {
html := `<html><body>
<p><a href="#">Afficher dans le navigateur</a></p>
<p>Webinar J-14 : boostez la recherche de vos données matériaux.</p>
</body></html>`
got := SnippetFromBodies("", html, 200)
lower := strings.ToLower(got)
if strings.Contains(lower, "afficher dans le navigateur") {
t.Fatalf("snippet = %q, want to skip boilerplate", got)
}
if !strings.Contains(got, "Webinar") {
t.Fatalf("snippet = %q, want real content", got)
}
}
func TestSnippetFromBodies_skipsViewInBrowserBracketedStored(t *testing.T) {
stored := "[ Afficher dans le navigateur ]..."
html := `<html><body><p>Contenu utile sur le webinar matériaux.</p></body></html>`
got := RepairSnippetWithBodies(stored, "", html)
if strings.Contains(strings.ToLower(got), "afficher dans le navigateur") {
t.Fatalf("snippet = %q", got)
}
if !strings.Contains(got, "webinar") {
t.Fatalf("snippet = %q, want body content", got)
}
}
func TestSnippetFromBodies_skipsSeparatorLine(t *testing.T) {
text := "----------------------------------------------------------------\nUn festival rétro au Château de Tilloloy arrive cet été."
got := SnippetFromBodies(text, "", 200)
if strings.HasPrefix(got, "---") {
t.Fatalf("snippet = %q, want content after separator", got)
}
if !strings.Contains(got, "festival") {
t.Fatalf("snippet = %q", got)
}
}
func TestSnippetFromBodies_skipsSeparatorOnlyLine(t *testing.T) {
stored := "--------------------------------------- * U..."
html := `<html><body><p>Un festival rétro au Château de Tilloloy arrive cet été.</p></body></html>`
got := RepairSnippetWithBodies(stored, "", html)
if strings.Contains(got, "---") {
t.Fatalf("snippet = %q, want no separator line", got)
}
if !strings.Contains(got, "festival") {
t.Fatalf("snippet = %q", got)
}
}
func TestSnippetFromBodies_skipsOVHFooter(t *testing.T) {
stored := "SAS OVH - https://www.ovh.com/ 2 rue Kellermann BP 80157..."
html := `<html><body><p>Votre facture OVH est disponible dans l'espace client.</p></body></html>`
got := RepairSnippetWithBodies(stored, "", html)
if strings.Contains(got, "Kellermann") {
t.Fatalf("snippet = %q, want no legal footer", got)
}
if !strings.Contains(got, "facture") {
t.Fatalf("snippet = %q", got)
}
}
func TestSnippetFromBodies_skipsFontFaceCSS(t *testing.T) {
stored := "@font-face { font-family: 'Playfair Display'; font-style: normal;..."
html := `<html><body><p>Découvrez notre collection printemps.</p></body></html>`
got := RepairSnippetWithBodies(stored, "", html)
if strings.Contains(got, "@font-face") {
t.Fatalf("snippet = %q", got)
}
if !strings.Contains(got, "collection") {
t.Fatalf("snippet = %q", got)
}
}
func TestSnippetFromBodies_stripsHTMLTags(t *testing.T) {
text := "<b>Bonjour</b> Eliott, votre <strong>commande</strong> est prête."
got := SnippetFromBodies(text, "", 200)
if strings.Contains(got, "<") || strings.Contains(got, ">") {
t.Fatalf("snippet = %q, want plain text without tags", got)
}
if !strings.Contains(got, "Bonjour") || !strings.Contains(got, "commande") {
t.Fatalf("snippet = %q, want readable text", got)
}
}
func TestRepairSnippetWithBodies_stripsStoredHTMLTags(t *testing.T) {
stored := "<span style=\"color:red\">Offre</span> limitée &amp; exclusive"
got := RepairSnippetWithBodies(stored, "", "")
if strings.Contains(got, "<") {
t.Fatalf("snippet = %q, want tags stripped", got)
}
if !strings.Contains(got, "Offre") || !strings.Contains(got, "&") {
t.Fatalf("snippet = %q, want unescaped text", got)
}
}
func TestStripSnippetMarkup_decodesEntitiesAndCSSTail(t *testing.T) {
raw := "Victoria vient d&#39;activer son compte sur passbolt /*//||//..."
got := stripSnippetMarkup(raw)
if strings.Contains(got, "&#39;") || strings.Contains(got, "&apos;") {
t.Fatalf("snippet = %q, want decoded apostrophe", got)
}
if !strings.Contains(got, "d'activer") {
t.Fatalf("snippet = %q, want apostrophe", got)
}
if strings.Contains(got, "/*") || strings.Contains(got, "||//") {
t.Fatalf("snippet = %q, want CSS tail removed", got)
}
}
func TestSnippetLooksLowQuality_encodedEntities(t *testing.T) {
if !SnippetLooksLowQuality("Hello d&#39;activer") {
t.Fatal("expected encoded entities to be low quality")
}
}
func TestRepairSnippetWithBodies_replacesCSSPreview(t *testing.T) {
stored := "FacebookMeta for Business :root { color-scheme: light dark;"
html := `<html><body><p>Inclure automatiquement des informations plus détaillées sur le compte.</p></body></html>`
got := RepairSnippetWithBodies(stored, "", html)
if strings.Contains(got, ":root") {
t.Fatalf("snippet = %q", got)
}
if !strings.Contains(got, "automatiquement") {
t.Fatalf("snippet = %q, want rebuilt from html", got)
}
}
func TestIsSnippetBoilerplate_userReportedCases(t *testing.T) {
cases := []string{
"--------------------------------------- * U...",
"[ Afficher dans le navigateur ]...",
"SAS OVH - https://www.ovh.com/ 2 rue Kellermann BP 80157...",
"FacebookMeta for Business :root { Color-scheme: light dark;...",
"@font-face { font-family: 'Playfair Display'; font-style: normal;...",
}
for _, c := range cases {
if !isSnippetBoilerplate(c) {
t.Fatalf("expected boilerplate for %q", c)
}
}
}