import type { FullContact } from "./types" import { fullContactDisplayName } from "./types" import { normalizeEmail } from "./find-contact" export type DuplicateMatchReason = "email" | "phone" | "name" export interface DuplicatePair { contactA: FullContact contactB: FullContact reason: DuplicateMatchReason } /** Max Levenshtein distance for short strings (exact cap). */ const MAX_NAME_DISTANCE = 2 /** Min similarity ratio (1 - distance/maxLen) for longer names. */ const MIN_NAME_SIMILARITY = 0.88 const MIN_NAME_LENGTH_FOR_FUZZY = 4 const MIN_PHONE_DIGITS = 6 export function levenshteinDistance(a: string, b: string): number { if (a === b) return 0 if (!a.length) return b.length if (!b.length) return a.length const rows = a.length + 1 const cols = b.length + 1 let prev = Array.from({ length: cols }, (_, i) => i) let curr = new Array(cols) for (let i = 1; i < rows; i++) { curr[0] = i for (let j = 1; j < cols; j++) { const cost = a[i - 1] === b[j - 1] ? 0 : 1 curr[j] = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost) } ;[prev, curr] = [curr, prev] } return prev[b.length] } export function normalizeContactName(name: string): string { return name .normalize("NFD") .replace(/[\u0300-\u036f]/g, "") .toLowerCase() .replace(/[^a-z0-9\s]/g, " ") .replace(/\s+/g, " ") .trim() } export function normalizePhone(phone: string): string { return phone.replace(/\D/g, "") } export function areNamesSimilar(a: string, b: string): boolean { const na = normalizeContactName(a) const nb = normalizeContactName(b) if (!na || !nb) return false if (na === nb) return true const maxLen = Math.max(na.length, nb.length) if (maxLen < MIN_NAME_LENGTH_FOR_FUZZY) return false const distance = levenshteinDistance(na, nb) if (distance <= MAX_NAME_DISTANCE) return true const similarity = 1 - distance / maxLen return similarity >= MIN_NAME_SIMILARITY } export function mergePairKey(idA: string, idB: string): string { return idA < idB ? `${idA}|${idB}` : `${idB}|${idA}` } function collectEmails(c: FullContact): string[] { return c.emails.map((e) => normalizeEmail(e.value)).filter(Boolean) } function collectPhones(c: FullContact): string[] { return c.phones .map((p) => normalizePhone(p.value)) .filter((d) => d.length >= MIN_PHONE_DIGITS) } function findDuplicateReason(a: FullContact, b: FullContact): DuplicateMatchReason | null { const emailsA = collectEmails(a) const emailsB = collectEmails(b) for (const ea of emailsA) { if (emailsB.includes(ea)) return "email" } const phonesA = collectPhones(a) const phonesB = collectPhones(b) for (const pa of phonesA) { for (const pb of phonesB) { if (pa === pb) return "phone" if (pa.length >= MIN_PHONE_DIGITS && pb.length >= MIN_PHONE_DIGITS) { const dist = levenshteinDistance(pa, pb) const maxLen = Math.max(pa.length, pb.length) if (dist <= 1 || 1 - dist / maxLen >= 0.95) return "phone" } } } const nameA = fullContactDisplayName(a) const nameB = fullContactDisplayName(b) if (nameA && nameB && areNamesSimilar(nameA, nameB)) return "name" return null } export function findDuplicatePairs( contacts: FullContact[], ignoredKeys: ReadonlySet = new Set(), maxResults = 50 ): DuplicatePair[] { const results: DuplicatePair[] = [] const seen = new Set() for (let i = 0; i < contacts.length; i++) { for (let j = i + 1; j < contacts.length; j++) { const a = contacts[i] const b = contacts[j] const key = mergePairKey(a.id, b.id) if (seen.has(key) || ignoredKeys.has(key)) continue const reason = findDuplicateReason(a, b) if (reason) { seen.add(key) results.push({ contactA: a, contactB: b, reason }) if (results.length >= maxResults) return results } } } return results }