ultisuite-backend/internal/contacts/discovery/signature.go
R3D347HR4Y 556d5f416d Enhance API and configuration for contact discovery and public sharing
- Introduced new endpoints for contact discovery, including scanning, listing, and managing discovered contacts.
- Implemented retry logic for handling missing DAV credentials during contact operations.
- Added public share functionality for drive API, allowing users to manage public shares, including upload, delete, and rename operations.
- Updated Nextcloud configuration to support public share links and improved error handling for public share permissions.
- Enhanced logging and validation across contact and drive APIs for better error tracking and user feedback.
- Added tests for new contact matching and ranking functionalities to ensure accuracy and reliability.
2026-06-06 20:27:02 +02:00

220 lines
5.6 KiB
Go

package discovery
import (
"html"
"regexp"
"strings"
)
var (
sigDelimiterRe = regexp.MustCompile(`(?m)^--\s*$`)
forwardedHeaderRe = regexp.MustCompile(`(?i)(?:^|\n)(?:-{5,}\s*)?(?:forwarded message|message transféré|message transmis|-----Original Message-----)`)
quotedReplyStartRe = regexp.MustCompile(`(?is)(?:^|\n)\s*(?:` +
`Le\s+.{4,200}?\s+a\s+écrit\s*:` +
`|On\s+.{4,200}?\s+wrote\s*:` +
`|Am\s+.{4,200}?\s+schrieb\s*:` +
`|El\s+.{4,200}?\s+escribió\s*:` +
`|Il\s+.{4,200}?\s+ha\s+scritto\s*:` +
`|-----Original Message-----` +
`|\n_{5,}` +
`)`)
replyHeaderBlockRe = regexp.MustCompile(`(?is)\n\s*(?:De|From)\s*:\s*.+\n\s*(?:Envoyé|Sent|Date)\s*:`)
replyHeaderInSigRe = regexp.MustCompile(`(?i)(?:\bwrote\s*:|\ba\s+écrit\s*:|-----Original|Envoyé\s*:|Sent\s*:|schrieb\s*:|escribió\s*:|ha\s+scritto\s*:)`)
phoneInSigRe = regexp.MustCompile(`(?:\+?\d[\d\s().-]{7,}\d)`)
emailInSigRe = regexp.MustCompile(`(?i)[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}`)
titleKeywordsRe = regexp.MustCompile(`(?i)(?:directeur|director|manager|ceo|cto|engineer|consultant|developer|president|founder|co-founder|chef|responsable)`)
)
func stripHTMLTags(s string) string {
if len(s) > 8000 {
s = s[len(s)-8000:]
}
s = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`).ReplaceAllString(s, "")
s = regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`).ReplaceAllString(s, "")
s = regexp.MustCompile(`(?i)<br\s*/?>`).ReplaceAllString(s, "\n")
s = regexp.MustCompile(`(?i)</p>`).ReplaceAllString(s, "\n")
s = regexp.MustCompile(`<[^>]+>`).ReplaceAllString(s, "")
return html.UnescapeString(s)
}
func stripQuotedPrefixLines(raw string) string {
lines := strings.Split(raw, "\n")
cut := len(lines)
for i, line := range lines {
if i < 2 {
continue
}
trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, ">") {
cut = i
break
}
}
if cut < len(lines) {
raw = strings.TrimSpace(strings.Join(lines[:cut], "\n"))
}
return raw
}
func stripQuotedReplyContent(raw string) string {
if loc := forwardedHeaderRe.FindStringIndex(raw); loc != nil {
raw = raw[:loc[0]]
}
if loc := quotedReplyStartRe.FindStringIndex(raw); loc != nil && loc[0] >= 15 {
raw = raw[:loc[0]]
}
if loc := replyHeaderBlockRe.FindStringIndex(raw); loc != nil && loc[0] >= 15 {
raw = raw[:loc[0]]
}
return stripQuotedPrefixLines(raw)
}
func emailsInText(s string) []string {
found := emailInSigRe.FindAllString(strings.ToLower(s), -1)
if len(found) == 0 {
return nil
}
seen := map[string]struct{}{}
var out []string
for _, e := range found {
e = strings.TrimSpace(e)
if e == "" {
continue
}
if _, ok := seen[e]; ok {
continue
}
seen[e] = struct{}{}
out = append(out, e)
}
return out
}
func signatureMatchesSender(candidate, senderEmail, displayName string) bool {
if replyHeaderInSigRe.MatchString(candidate) {
return false
}
sender := strings.ToLower(strings.TrimSpace(senderEmail))
if sender == "" {
return true
}
for _, e := range emailsInText(candidate) {
if e != sender {
return false
}
}
if displayName != "" {
nameTokens := strings.Fields(strings.ToLower(displayName))
if len(nameTokens) >= 2 {
candidateLower := strings.ToLower(candidate)
matches := 0
for _, tok := range nameTokens {
if len(tok) < 2 {
continue
}
if strings.Contains(candidateLower, tok) {
matches++
}
}
if matches == 0 && !phoneInSigRe.MatchString(candidate) && len(emailsInText(candidate)) == 0 {
return false
}
}
}
return true
}
func extractSignature(bodyText, bodyHTML, senderEmail, displayName string) (text string, confidence float64) {
raw := strings.TrimSpace(bodyText)
if len(raw) > 8000 {
raw = raw[len(raw)-8000:]
}
if raw == "" && bodyHTML != "" {
raw = stripHTMLTags(bodyHTML)
}
if raw == "" {
return "", 0
}
raw = stripQuotedReplyContent(raw)
raw = strings.TrimSpace(raw)
if raw == "" {
return "", 0
}
parts := sigDelimiterRe.Split(raw, -1)
candidate := strings.TrimSpace(raw)
confidence = 0.3
if len(parts) > 1 {
candidate = strings.TrimSpace(parts[len(parts)-1])
confidence = 0.7
} else {
lines := strings.Split(raw, "\n")
if len(lines) > 4 {
start := len(lines) - 8
if start < 0 {
start = 0
}
tail := strings.Join(lines[start:], "\n")
if phoneInSigRe.MatchString(tail) || emailInSigRe.MatchString(tail) || titleKeywordsRe.MatchString(tail) {
candidate = strings.TrimSpace(tail)
confidence = 0.55
}
}
}
candidate = strings.TrimSpace(candidate)
if len(candidate) < 10 || len(candidate) > 2000 {
return "", 0
}
if !signatureMatchesSender(candidate, senderEmail, displayName) {
return "", 0
}
senderLower := strings.ToLower(strings.TrimSpace(senderEmail))
if senderLower != "" {
local := strings.Split(senderLower, "@")[0]
if local != "" && !strings.Contains(strings.ToLower(candidate), local) {
if !emailInSigRe.MatchString(candidate) && !phoneInSigRe.MatchString(candidate) {
confidence *= 0.6
}
}
}
if confidence < 0.35 {
return "", 0
}
return candidate, confidence
}
func detectForwardedAddresses(bodyText, bodyHTML string) []string {
raw := bodyText
if raw == "" && bodyHTML != "" {
raw = stripHTMLTags(bodyHTML)
}
if raw == "" {
return nil
}
var out []string
seen := map[string]struct{}{}
fromLineRe := regexp.MustCompile(`(?im)^(?:from|de|expéditeur)\s*:\s*(?:[^<\n]*<)?([a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,})>?`)
for _, m := range fromLineRe.FindAllStringSubmatch(raw, -1) {
if len(m) > 1 {
email := strings.ToLower(strings.TrimSpace(m[1]))
if _, ok := seen[email]; !ok && email != "" {
seen[email] = struct{}{}
out = append(out, email)
}
}
}
return out
}