180 lines
5.2 KiB
TypeScript
180 lines
5.2 KiB
TypeScript
const UNSUBSCRIBE_TERM =
|
|
/unsubscribe|désabonner|desabonner|préférences|preferences/i
|
|
|
|
/** Matches href paths like /unsub, ?unsub=, List-Unsubscribe URLs, etc. */
|
|
const UNSUBSCRIBE_HREF = /unsub/i
|
|
|
|
export type UnsubscribeMailto = {
|
|
address: string
|
|
subject?: string
|
|
body?: string
|
|
}
|
|
|
|
export type UnsubscribeAction =
|
|
| { kind: "mailto"; mailto: UnsubscribeMailto }
|
|
| { kind: "http"; url: string }
|
|
|
|
function isUnsubscribeHref(href: string): boolean {
|
|
const h = href.trim()
|
|
if (!h) return false
|
|
return UNSUBSCRIBE_HREF.test(h) || UNSUBSCRIBE_TERM.test(h)
|
|
}
|
|
|
|
function unwrapAngle(s: string): string {
|
|
const t = s.trim()
|
|
if (t.startsWith("<") && t.endsWith(">")) return t.slice(1, -1).trim()
|
|
return t
|
|
}
|
|
|
|
function splitListHeaderParts(raw: string): string[] {
|
|
const parts: string[] = []
|
|
let cur = ""
|
|
let depth = 0
|
|
for (const ch of raw) {
|
|
if (ch === "<") depth++
|
|
else if (ch === ">") depth = Math.max(0, depth - 1)
|
|
if (ch === "," && depth === 0) {
|
|
if (cur.trim()) parts.push(cur.trim())
|
|
cur = ""
|
|
} else {
|
|
cur += ch
|
|
}
|
|
}
|
|
if (cur.trim()) parts.push(cur.trim())
|
|
return parts
|
|
}
|
|
|
|
function parseMailtoUrl(raw: string): UnsubscribeMailto | null {
|
|
const unwrapped = unwrapAngle(raw)
|
|
if (!/^mailto:/i.test(unwrapped)) return null
|
|
try {
|
|
const u = new URL(unwrapped)
|
|
const pathAddr = decodeURIComponent(u.pathname.replace(/^\/+/, ""))
|
|
const address =
|
|
pathAddr.includes("@")
|
|
? pathAddr
|
|
: decodeURIComponent(unwrapped.replace(/^mailto:/i, "").split("?")[0] ?? "")
|
|
if (!address.includes("@")) return null
|
|
return {
|
|
address,
|
|
subject: u.searchParams.get("subject") ?? undefined,
|
|
body: u.searchParams.get("body") ?? undefined,
|
|
}
|
|
} catch {
|
|
const addr = unwrapped.replace(/^mailto:/i, "").split("?")[0]?.trim()
|
|
if (!addr?.includes("@")) return null
|
|
return { address: addr }
|
|
}
|
|
}
|
|
|
|
function parseListUnsubscribeHeader(raw: string): {
|
|
mailto: UnsubscribeMailto | null
|
|
http: string | null
|
|
} {
|
|
let mailto: UnsubscribeMailto | null = null
|
|
let http: string | null = null
|
|
for (const part of splitListHeaderParts(raw)) {
|
|
const p = unwrapAngle(part)
|
|
const m = parseMailtoUrl(p)
|
|
if (m && !mailto) {
|
|
mailto = m
|
|
continue
|
|
}
|
|
if (/^https?:\/\//i.test(p) && !http) {
|
|
http = p
|
|
}
|
|
}
|
|
return { mailto, http }
|
|
}
|
|
|
|
function firstHttpUrl(raw: string): string | null {
|
|
const fromHeader = parseListUnsubscribeHeader(raw).http
|
|
if (fromHeader) return fromHeader
|
|
const angle = raw.match(/<https?:\/\/[^>]+>/i)
|
|
if (angle) return angle[0]!.slice(1, -1)
|
|
const plain = raw.match(/https?:\/\/[^\s>,]+/i)
|
|
return plain?.[0] ?? null
|
|
}
|
|
|
|
function mailtoFromHtml(html: string): UnsubscribeMailto | null {
|
|
const re =
|
|
/<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi
|
|
let m: RegExpExecArray | null
|
|
while ((m = re.exec(html)) !== null) {
|
|
const href = m[1] ?? ""
|
|
const text = m[2] ?? ""
|
|
const mailto = parseMailtoUrl(href)
|
|
if (mailto && (UNSUBSCRIBE_TERM.test(text) || isUnsubscribeHref(href))) {
|
|
return mailto
|
|
}
|
|
}
|
|
return null
|
|
}
|
|
|
|
function httpFromHtml(html: string): string | null {
|
|
const re =
|
|
/<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi
|
|
let m: RegExpExecArray | null
|
|
while ((m = re.exec(html)) !== null) {
|
|
const href = m[1] ?? ""
|
|
const text = m[2] ?? ""
|
|
if (/^https?:\/\//i.test(href) && (isUnsubscribeHref(href) || UNSUBSCRIBE_TERM.test(text))) {
|
|
return href
|
|
}
|
|
}
|
|
return null
|
|
}
|
|
|
|
function httpFromText(text: string): string | null {
|
|
const re = /https?:\/\/[^\s)]+/gi
|
|
let m: RegExpExecArray | null
|
|
while ((m = re.exec(text)) !== null) {
|
|
const url = m[0]!
|
|
const start = Math.max(0, (m.index ?? 0) - 40)
|
|
const ctx = text.slice(start, (m.index ?? 0) + url.length + 40)
|
|
if (UNSUBSCRIBE_HREF.test(url) || UNSUBSCRIBE_TERM.test(ctx)) return url
|
|
}
|
|
return null
|
|
}
|
|
|
|
/** Resolves unsubscribe: mailto List-Unsubscribe preferred, then https, then body links. */
|
|
export function findUnsubscribeAction(
|
|
bodyHtml?: string,
|
|
bodyText?: string,
|
|
listUnsubscribeHeader?: string
|
|
): UnsubscribeAction | null {
|
|
if (listUnsubscribeHeader?.trim()) {
|
|
const { mailto, http } = parseListUnsubscribeHeader(listUnsubscribeHeader)
|
|
if (mailto) return { kind: "mailto", mailto }
|
|
if (http) return { kind: "http", url: http }
|
|
}
|
|
|
|
const html = bodyHtml?.trim() ?? ""
|
|
const text = bodyText?.trim() ?? ""
|
|
|
|
const htmlMailto = mailtoFromHtml(html)
|
|
if (htmlMailto) return { kind: "mailto", mailto: htmlMailto }
|
|
|
|
const htmlHttp = httpFromHtml(html)
|
|
if (htmlHttp) return { kind: "http", url: htmlHttp }
|
|
|
|
return httpFromText(text) ? { kind: "http", url: httpFromText(text)! } : null
|
|
}
|
|
|
|
/** @deprecated Use findUnsubscribeAction */
|
|
export function findUnsubscribeHref(
|
|
bodyHtml?: string,
|
|
bodyText?: string,
|
|
listUnsubscribeHeader?: string
|
|
): string | null {
|
|
const action = findUnsubscribeAction(bodyHtml, bodyText, listUnsubscribeHeader)
|
|
if (!action) return null
|
|
if (action.kind === "http") return action.url
|
|
const { address, subject, body } = action.mailto
|
|
const q = new URLSearchParams()
|
|
if (subject) q.set("subject", subject)
|
|
if (body) q.set("body", body)
|
|
const qs = q.toString()
|
|
return `mailto:${address}${qs ? `?${qs}` : ""}`
|
|
}
|