- Introduced new endpoints for contact discovery, including scanning, listing, and managing discovered contacts. - Implemented retry logic for handling missing DAV credentials during contact operations. - Added public share functionality for drive API, allowing users to manage public shares, including upload, delete, and rename operations. - Updated Nextcloud configuration to support public share links and improved error handling for public share permissions. - Enhanced logging and validation across contact and drive APIs for better error tracking and user feedback. - Added tests for new contact matching and ranking functionalities to ensure accuracy and reliability.
220 lines
5.6 KiB
Go
220 lines
5.6 KiB
Go
package discovery
|
|
|
|
import (
|
|
"html"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
sigDelimiterRe = regexp.MustCompile(`(?m)^--\s*$`)
|
|
forwardedHeaderRe = regexp.MustCompile(`(?i)(?:^|\n)(?:-{5,}\s*)?(?:forwarded message|message transféré|message transmis|-----Original Message-----)`)
|
|
quotedReplyStartRe = regexp.MustCompile(`(?is)(?:^|\n)\s*(?:` +
|
|
`Le\s+.{4,200}?\s+a\s+écrit\s*:` +
|
|
`|On\s+.{4,200}?\s+wrote\s*:` +
|
|
`|Am\s+.{4,200}?\s+schrieb\s*:` +
|
|
`|El\s+.{4,200}?\s+escribió\s*:` +
|
|
`|Il\s+.{4,200}?\s+ha\s+scritto\s*:` +
|
|
`|-----Original Message-----` +
|
|
`|\n_{5,}` +
|
|
`)`)
|
|
replyHeaderBlockRe = regexp.MustCompile(`(?is)\n\s*(?:De|From)\s*:\s*.+\n\s*(?:Envoyé|Sent|Date)\s*:`)
|
|
replyHeaderInSigRe = regexp.MustCompile(`(?i)(?:\bwrote\s*:|\ba\s+écrit\s*:|-----Original|Envoyé\s*:|Sent\s*:|schrieb\s*:|escribió\s*:|ha\s+scritto\s*:)`)
|
|
phoneInSigRe = regexp.MustCompile(`(?:\+?\d[\d\s().-]{7,}\d)`)
|
|
emailInSigRe = regexp.MustCompile(`(?i)[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}`)
|
|
titleKeywordsRe = regexp.MustCompile(`(?i)(?:directeur|director|manager|ceo|cto|engineer|consultant|developer|president|founder|co-founder|chef|responsable)`)
|
|
)
|
|
|
|
func stripHTMLTags(s string) string {
|
|
if len(s) > 8000 {
|
|
s = s[len(s)-8000:]
|
|
}
|
|
s = regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`).ReplaceAllString(s, "")
|
|
s = regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`).ReplaceAllString(s, "")
|
|
s = regexp.MustCompile(`(?i)<br\s*/?>`).ReplaceAllString(s, "\n")
|
|
s = regexp.MustCompile(`(?i)</p>`).ReplaceAllString(s, "\n")
|
|
s = regexp.MustCompile(`<[^>]+>`).ReplaceAllString(s, "")
|
|
return html.UnescapeString(s)
|
|
}
|
|
|
|
func stripQuotedPrefixLines(raw string) string {
|
|
lines := strings.Split(raw, "\n")
|
|
cut := len(lines)
|
|
for i, line := range lines {
|
|
if i < 2 {
|
|
continue
|
|
}
|
|
trimmed := strings.TrimSpace(line)
|
|
if strings.HasPrefix(trimmed, ">") {
|
|
cut = i
|
|
break
|
|
}
|
|
}
|
|
if cut < len(lines) {
|
|
raw = strings.TrimSpace(strings.Join(lines[:cut], "\n"))
|
|
}
|
|
return raw
|
|
}
|
|
|
|
func stripQuotedReplyContent(raw string) string {
|
|
if loc := forwardedHeaderRe.FindStringIndex(raw); loc != nil {
|
|
raw = raw[:loc[0]]
|
|
}
|
|
if loc := quotedReplyStartRe.FindStringIndex(raw); loc != nil && loc[0] >= 15 {
|
|
raw = raw[:loc[0]]
|
|
}
|
|
if loc := replyHeaderBlockRe.FindStringIndex(raw); loc != nil && loc[0] >= 15 {
|
|
raw = raw[:loc[0]]
|
|
}
|
|
return stripQuotedPrefixLines(raw)
|
|
}
|
|
|
|
func emailsInText(s string) []string {
|
|
found := emailInSigRe.FindAllString(strings.ToLower(s), -1)
|
|
if len(found) == 0 {
|
|
return nil
|
|
}
|
|
seen := map[string]struct{}{}
|
|
var out []string
|
|
for _, e := range found {
|
|
e = strings.TrimSpace(e)
|
|
if e == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[e]; ok {
|
|
continue
|
|
}
|
|
seen[e] = struct{}{}
|
|
out = append(out, e)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func signatureMatchesSender(candidate, senderEmail, displayName string) bool {
|
|
if replyHeaderInSigRe.MatchString(candidate) {
|
|
return false
|
|
}
|
|
|
|
sender := strings.ToLower(strings.TrimSpace(senderEmail))
|
|
if sender == "" {
|
|
return true
|
|
}
|
|
|
|
for _, e := range emailsInText(candidate) {
|
|
if e != sender {
|
|
return false
|
|
}
|
|
}
|
|
|
|
if displayName != "" {
|
|
nameTokens := strings.Fields(strings.ToLower(displayName))
|
|
if len(nameTokens) >= 2 {
|
|
candidateLower := strings.ToLower(candidate)
|
|
matches := 0
|
|
for _, tok := range nameTokens {
|
|
if len(tok) < 2 {
|
|
continue
|
|
}
|
|
if strings.Contains(candidateLower, tok) {
|
|
matches++
|
|
}
|
|
}
|
|
if matches == 0 && !phoneInSigRe.MatchString(candidate) && len(emailsInText(candidate)) == 0 {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func extractSignature(bodyText, bodyHTML, senderEmail, displayName string) (text string, confidence float64) {
|
|
raw := strings.TrimSpace(bodyText)
|
|
if len(raw) > 8000 {
|
|
raw = raw[len(raw)-8000:]
|
|
}
|
|
if raw == "" && bodyHTML != "" {
|
|
raw = stripHTMLTags(bodyHTML)
|
|
}
|
|
if raw == "" {
|
|
return "", 0
|
|
}
|
|
|
|
raw = stripQuotedReplyContent(raw)
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return "", 0
|
|
}
|
|
|
|
parts := sigDelimiterRe.Split(raw, -1)
|
|
candidate := strings.TrimSpace(raw)
|
|
confidence = 0.3
|
|
|
|
if len(parts) > 1 {
|
|
candidate = strings.TrimSpace(parts[len(parts)-1])
|
|
confidence = 0.7
|
|
} else {
|
|
lines := strings.Split(raw, "\n")
|
|
if len(lines) > 4 {
|
|
start := len(lines) - 8
|
|
if start < 0 {
|
|
start = 0
|
|
}
|
|
tail := strings.Join(lines[start:], "\n")
|
|
if phoneInSigRe.MatchString(tail) || emailInSigRe.MatchString(tail) || titleKeywordsRe.MatchString(tail) {
|
|
candidate = strings.TrimSpace(tail)
|
|
confidence = 0.55
|
|
}
|
|
}
|
|
}
|
|
|
|
candidate = strings.TrimSpace(candidate)
|
|
if len(candidate) < 10 || len(candidate) > 2000 {
|
|
return "", 0
|
|
}
|
|
|
|
if !signatureMatchesSender(candidate, senderEmail, displayName) {
|
|
return "", 0
|
|
}
|
|
|
|
senderLower := strings.ToLower(strings.TrimSpace(senderEmail))
|
|
if senderLower != "" {
|
|
local := strings.Split(senderLower, "@")[0]
|
|
if local != "" && !strings.Contains(strings.ToLower(candidate), local) {
|
|
if !emailInSigRe.MatchString(candidate) && !phoneInSigRe.MatchString(candidate) {
|
|
confidence *= 0.6
|
|
}
|
|
}
|
|
}
|
|
|
|
if confidence < 0.35 {
|
|
return "", 0
|
|
}
|
|
return candidate, confidence
|
|
}
|
|
|
|
func detectForwardedAddresses(bodyText, bodyHTML string) []string {
|
|
raw := bodyText
|
|
if raw == "" && bodyHTML != "" {
|
|
raw = stripHTMLTags(bodyHTML)
|
|
}
|
|
if raw == "" {
|
|
return nil
|
|
}
|
|
|
|
var out []string
|
|
seen := map[string]struct{}{}
|
|
|
|
fromLineRe := regexp.MustCompile(`(?im)^(?:from|de|expéditeur)\s*:\s*(?:[^<\n]*<)?([a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,})>?`)
|
|
for _, m := range fromLineRe.FindAllStringSubmatch(raw, -1) {
|
|
if len(m) > 1 {
|
|
email := strings.ToLower(strings.TrimSpace(m[1]))
|
|
if _, ok := seen[email]; !ok && email != "" {
|
|
seen[email] = struct{}{}
|
|
out = append(out, email)
|
|
}
|
|
}
|
|
}
|
|
return out
|
|
}
|