ultisuite-backend/internal/contacts/discovery/classify.go
R3D347HR4Y 556d5f416d Enhance API and configuration for contact discovery and public sharing
- Introduced new endpoints for contact discovery, including scanning, listing, and managing discovered contacts.
- Implemented retry logic for handling missing DAV credentials during contact operations.
- Added public share functionality for drive API, allowing users to manage public shares, including upload, delete, and rename operations.
- Updated Nextcloud configuration to support public share links and improved error handling for public share permissions.
- Enhanced logging and validation across contact and drive APIs for better error tracking and user feedback.
- Added tests for new contact matching and ranking functionalities to ensure accuracy and reliability.
2026-06-06 20:27:02 +02:00

132 lines
3.2 KiB
Go

package discovery
import (
"strings"
)
var mailingListDomains = map[string]struct{}{
"sendgrid.net": {},
"sendgrid.com": {},
"sendinblue.com": {},
"brevo.com": {},
"mailchimp.com": {},
"list-manage.com": {},
"mailgun.org": {},
"mailgun.net": {},
"amazonses.com": {},
"messagingengine.com": {},
"constantcontact.com": {},
"campaign-archive.com": {},
"hubspotemail.net": {},
"mailjet.com": {},
"sparkpostmail.com": {},
"postmarkapp.com": {},
"mandrillapp.com": {},
"emarsys.net": {},
"customeriomail.com": {},
"intercom-mail.com": {},
}
var disposableDomains = map[string]struct{}{
"libero.it": {},
"guerrillamail.com": {},
"mailinator.com": {},
"tempmail.com": {},
"10minutemail.com": {},
"throwaway.email": {},
"yopmail.com": {},
"sharklasers.com": {},
"trashmail.com": {},
"getnada.com": {},
"maildrop.cc": {},
"dispostable.com": {},
"fakeinbox.com": {},
"temp-mail.org": {},
"burnermail.io": {},
}
func emailDomain(email string) string {
email = strings.ToLower(strings.TrimSpace(email))
at := strings.LastIndex(email, "@")
if at < 0 || at == len(email)-1 {
return ""
}
return email[at+1:]
}
func isMailingListDomain(domain string) bool {
domain = strings.ToLower(strings.TrimSpace(domain))
if _, ok := mailingListDomains[domain]; ok {
return true
}
for d := range mailingListDomains {
if strings.HasSuffix(domain, "."+d) {
return true
}
}
return false
}
func isNoReplyEmail(email string) bool {
email = strings.ToLower(strings.TrimSpace(email))
return strings.Contains(email, "noreply") ||
strings.Contains(email, "no-reply") ||
strings.Contains(email, "no_reply")
}
func isDisposableDomain(domain string) bool {
domain = strings.ToLower(strings.TrimSpace(domain))
if _, ok := disposableDomains[domain]; ok {
return true
}
for d := range disposableDomains {
if strings.HasSuffix(domain, "."+d) {
return true
}
}
return false
}
func classifyAddress(agg *addressAgg) (isMailingList, isDisposable, isSpamHeavy bool, reason string) {
domain := emailDomain(agg.Email)
isDisposable = isDisposableDomain(domain)
isNoReply := isNoReplyEmail(agg.Email)
isMailingList = isMailingListDomain(domain) || agg.MailingList > 0 || agg.ListUnsub > 0 || isNoReply
if agg.MessageCount >= 3 {
mlSignals := agg.MailingList + agg.ListUnsub
if float64(mlSignals)/float64(agg.MessageCount) >= 0.5 {
isMailingList = true
}
}
if agg.MessageCount >= 3 {
spamRatio := float64(agg.SpamCount) / float64(agg.MessageCount)
isSpamHeavy = spamRatio >= 0.5
}
var parts []string
if isNoReply {
parts = append(parts, "no-reply")
}
if isMailingList && !isNoReply {
parts = append(parts, "liste de diffusion")
}
if isDisposable {
parts = append(parts, "email jetable")
}
if isSpamHeavy {
parts = append(parts, "majoritairement spam")
}
if len(parts) > 0 {
reason = strings.Join(parts, ", ")
}
return isMailingList, isDisposable, isSpamHeavy, reason
}
func shouldEnrich(isMailingList, isDisposable, isSpamHeavy bool, sigCount int) bool {
if isMailingList || isDisposable || isSpamHeavy {
return false
}
return sigCount > 0
}