package discovery import ( "context" "encoding/json" "strings" ) func (s *Service) inferMissingCompanies(ctx context.Context, externalUserID, ncUserID, bookID string) { domainCompanies := s.loadDomainCompanyHints(ctx, externalUserID, ncUserID, bookID) rows, err := s.db.Query(ctx, ` SELECT p.id::text, p.primary_email, COALESCE(p.enriched_data, '{}'::jsonb) FROM contact_discovered_profiles p JOIN users u ON p.user_id = u.id WHERE u.external_id = $1 AND p.status = 'suggested' AND NOT p.is_mailing_list AND NOT p.is_disposable AND NOT p.is_spam_heavy` + noReplyProfilesSQL + ` `, externalUserID) if err != nil { return } defer rows.Close() for rows.Next() { var id, email string var enrichedJSON []byte if err := rows.Scan(&id, &email, &enrichedJSON); err != nil { continue } domain := emailDomain(email) if isConsumerMailDomain(domain) { continue } var data EnrichedContactData _ = json.Unmarshal(enrichedJSON, &data) if strings.TrimSpace(data.Company) != "" { continue } company, ok := domainCompanies[domain] if !ok || strings.TrimSpace(company) == "" { continue } data.Company = company payload, _ := json.Marshal(data) _, _ = s.db.Exec(ctx, ` UPDATE contact_discovered_profiles SET enriched_data = $3::jsonb, enrichment_status = CASE WHEN enrichment_status IN ('skipped', 'failed', 'pending') THEN 'enriched' ELSE enrichment_status END, updated_at = NOW() WHERE id = $1::uuid AND user_id = (SELECT id FROM users WHERE external_id = $2) `, id, externalUserID, string(payload)) } } func (s *Service) loadDomainCompanyHints(ctx context.Context, externalUserID, ncUserID, bookID string) map[string]string { hints := map[string]int{} domainBest := map[string]string{} rows, err := s.db.Query(ctx, ` SELECT lower(split_part(p.primary_email, '@', 2)) AS domain, trim(both '"' from (p.enriched_data->>'company')) AS company, COUNT(*)::int AS cnt FROM contact_discovered_profiles p JOIN users u ON p.user_id = u.id WHERE u.external_id = $1 AND p.enriched_data->>'company' IS NOT NULL AND trim(p.enriched_data->>'company') != '' GROUP BY 1, 2 HAVING COUNT(*) >= 2 `, externalUserID) if err == nil { defer rows.Close() for rows.Next() { var domain, company string var cnt int if err := rows.Scan(&domain, &company, &cnt); err != nil { continue } if isConsumerMailDomain(domain) { continue } if cnt > hints[domain] { hints[domain] = cnt domainBest[domain] = company } } } if s.nc != nil && ncUserID != "" { contacts := s.loadNCContacts(ctx, ncUserID, bookID) orgByDomain := map[string]map[string]int{} for _, c := range contacts { org := strings.TrimSpace(c.Org) if org == "" { continue } domain := emailDomain(c.Email) if isConsumerMailDomain(domain) { continue } if orgByDomain[domain] == nil { orgByDomain[domain] = map[string]int{} } orgByDomain[domain][org]++ } for domain, orgs := range orgByDomain { bestOrg := "" bestCnt := 0 for org, cnt := range orgs { if cnt > bestCnt { bestCnt = cnt bestOrg = org } } if bestCnt < 2 { continue } batchOrg := domainBest[domain] batchCnt := hints[domain] if batchCnt >= 2 && !companyNamesMatch(batchOrg, bestOrg) { delete(domainBest, domain) continue } if batchCnt == 0 || bestCnt >= batchCnt { domainBest[domain] = bestOrg } } } return domainBest } func companyNamesMatch(a, b string) bool { a = normalizeCompanyName(a) b = normalizeCompanyName(b) if a == "" || b == "" { return false } return a == b } func normalizeCompanyName(s string) string { return strings.ToLower(strings.TrimSpace(s)) }