ultisuite-backend/internal/mail/imap/charset_test.go
R3D347HR4Y e6a04fdd31
Some checks failed
CI / Go tests (push) Has been cancelled
CI / Integration tests (push) Has been cancelled
CI / DB migrations (push) Has been cancelled
feat(mail): implement UTF-8 mojibake repair functionality
- Added repairUTF8Mojibake function to fix UTF-8 text misread as Latin-1, addressing common encoding issues in email bodies.
- Enhanced RepairStoredBodies and RepairSnippetWithBodies functions to utilize the new mojibake repair logic.
- Introduced unit tests for mojibake repair functionality to ensure accurate text restoration.
- Updated charset handling in repairLegacyCharsetString to incorporate mojibake repair, improving overall text processing reliability.
2026-06-18 11:11:36 +02:00

85 lines
3.0 KiB
Go

package imap
import (
"strings"
"testing"
)
func TestParseBody_iso88591Charset(t *testing.T) {
body := []byte("Vous avez un rendez-vous programm\xe9.\r\nLien de la r\xe9union.")
var b strings.Builder
b.WriteString("From: calendar@google.com\r\n")
b.WriteString("To: user@example.com\r\n")
b.WriteString("Subject: Invitation\r\n")
b.WriteString("Content-Type: text/plain; charset=iso-8859-1\r\n")
b.WriteString("Content-Transfer-Encoding: 8bit\r\n")
b.WriteString("\r\n")
b.Write(body)
text, html := parseBody([]byte(b.String()))
if html != "" {
t.Fatalf("html = %q, want empty", html)
}
if !strings.Contains(text, "programmé") {
t.Fatalf("text = %q, want iso-8859-1 accents", text)
}
if !strings.Contains(text, "réunion") {
t.Fatalf("text = %q, want réunion", text)
}
}
func TestRepairUTF8Mojibake_doubleEncodedFrench(t *testing.T) {
raw := "Si elle bouge, tu perds en réactivité. La NEXOR a été pensée pour ça."
repaired := repairUTF8Mojibake(raw)
want := "Si elle bouge, tu perds en réactivité. La NEXOR a été pensée pour ça."
if repaired != want {
t.Fatalf("repaired = %q, want %q", repaired, want)
}
}
func TestRepairUTF8Mojibake_longMarketingBody(t *testing.T) {
raw := "Si elle bouge, tu perds en réactivité. La NEXOR a été pensée pour ça. Nylon ultra résistant, boucle rapide, compatible 100% F1 et TSI. Sobre, solide, zéro fioritures. Détache la sangle, passe-la dans les passants, c'est prêt. Déjà adoptée par +8 000 pompiers en Europe. Elle était en rupture. Elle est de retour."
repaired := repairUTF8Mojibake(raw)
if strings.Contains(repaired, "Ã") {
t.Fatalf("repaired still has mojibake: %q", repaired)
}
if !strings.Contains(repaired, "réactivité") || !strings.Contains(repaired, "Déjà") {
t.Fatalf("repaired = %q", repaired)
}
}
func TestRepairUTF8Mojibake_leavesValidUTF8Untouched(t *testing.T) {
raw := "Si elle bouge, tu perds en réactivité."
if got := repairUTF8Mojibake(raw); got != raw {
t.Fatalf("repaired = %q, want unchanged", got)
}
}
func TestRepairSnippetWithBodies_mojibakePreview(t *testing.T) {
stored := "Nylon ultra résistant, boucle rapide, zéro fioritures."
got := RepairSnippetWithBodies(stored, "", "")
if strings.Contains(got, "Ã") {
t.Fatalf("snippet = %q, want accents repaired", got)
}
if !strings.Contains(got, "résistant") || !strings.Contains(got, "zéro") {
t.Fatalf("snippet = %q", got)
}
}
func TestRepairLegacyCharsetString_latin1BytesInString(t *testing.T) {
// Simulates DB row stored before charset decode (raw Latin-1 bytes in text column).
raw := string([]byte{0x56, 0x6f, 0x75, 0x73, 0x20, 0x72, 0xe9, 0x75, 0x6e, 0x69, 0x6f, 0x6e})
repaired := repairLegacyCharsetString(raw)
if repaired != "Vous réunion" {
t.Fatalf("repaired = %q", repaired)
}
}
func TestRepairStoredBodies_legacyLatin1(t *testing.T) {
raw := string([]byte("programm\xe9"))
text, _ := RepairStoredBodies(raw, "")
if text != "programmé" {
t.Fatalf("text = %q", text)
}
}