package migration import ( "context" "encoding/json" "fmt" "net/http" "net/url" "strings" "time" "github.com/jackc/pgx/v5/pgxpool" "github.com/ultisuite/ulti-backend/internal/mail/sanitize" "github.com/ultisuite/ulti-backend/internal/mail/threading" ) const graphMessageSelect = "id,subject,bodyPreview,body,from,toRecipients,ccRecipients,replyTo," + "receivedDateTime,sentDateTime,parentFolderId,isRead,flag,internetMessageId,internetMessageHeaders" type GraphImporter struct { db *pgxpool.Pool client *http.Client baseURL string userUPN string folders map[string]graphFolderMeta } type graphFolderMeta struct { RemoteName string FolderType string } func NewGraphImporter(db *pgxpool.Pool) *GraphImporter { return &GraphImporter{ db: db, client: &http.Client{Timeout: 90 * time.Second}, folders: map[string]graphFolderMeta{}, } } func (g *GraphImporter) WithHTTPClient(c *http.Client) *GraphImporter { if c != nil { g.client = c } return g } func (g *GraphImporter) WithUserPrincipal(upn string) *GraphImporter { g.userUPN = strings.TrimSpace(upn) return g } func (g *GraphImporter) userBase() string { return graphUserBase(g.userUPN) } func (g *GraphImporter) WithBaseURL(baseURL string) *GraphImporter { g.baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/") return g } func (g *GraphImporter) graphURL(path string) string { if g.baseURL != "" { return g.baseURL + path } return "https://graph.microsoft.com" + path } type graphMessage struct { ID string `json:"id"` Subject string `json:"subject"` BodyPreview string `json:"bodyPreview"` Body graphBody `json:"body"` From graphRecipient `json:"from"` ToRecipients []graphRecipient `json:"toRecipients"` CcRecipients []graphRecipient `json:"ccRecipients"` ReplyTo []graphRecipient `json:"replyTo"` ReceivedDateTime string `json:"receivedDateTime"` SentDateTime string `json:"sentDateTime"` ParentFolderID string `json:"parentFolderId"` IsRead bool `json:"isRead"` Flag graphFlag `json:"flag"` InternetMessageID string `json:"internetMessageId"` InternetMessageHeaders []graphHeader `json:"internetMessageHeaders"` Removed *struct { Reason string `json:"reason"` } `json:"@removed"` } type graphBody struct { ContentType string `json:"contentType"` Content string `json:"content"` } type graphRecipient struct { EmailAddress graphEmailAddress `json:"emailAddress"` } type graphEmailAddress struct { Name string `json:"name"` Address string `json:"address"` } type graphFlag struct { FlagStatus string `json:"flagStatus"` } type graphHeader struct { Name string `json:"name"` Value string `json:"value"` } func (g *GraphImporter) ImportBatch( ctx context.Context, job *Job, accessToken string, delta bool, update func(status string, cursor, stats map[string]any, jobErr string) error, ) error { accountID, err := g.resolveMailAccountID(ctx, job.UserID) if err != nil { return err } if err := ensureDefaultMailFolders(ctx, g.db, accountID); err != nil { return err } if err := g.ensureGraphFolders(ctx, accessToken); err != nil { return err } items, err := LoadImportedItemStore(ctx, g.db, job.ID, job.CursorJSON) if err != nil { return err } if delta { deltaLink, _ := job.CursorJSON["deltaLink"].(string) if deltaLink != "" { more, err := g.importDeltaPage(ctx, job, accessToken, accountID, deltaLink, items) if err != nil { return err } if more { return update("pending", job.CursorJSON, job.StatsJSON, "") } return update("completed", job.CursorJSON, job.StatsJSON, "") } } nextLink, _ := job.CursorJSON["nextLink"].(string) var listURL string if nextLink != "" { listURL = nextLink } else { listURL = g.graphURL(g.userBase()+"/messages?$top=100&$orderby="+url.QueryEscape("receivedDateTime desc")+"&$select="+graphMessageSelect) } body, err := g.apiGet(ctx, listURL, accessToken) if err != nil { return err } var listed struct { Value []graphMessage `json:"value"` NextLink string `json:"@odata.nextLink"` DeltaLink string `json:"@odata.deltaLink"` } if err := json.Unmarshal(body, &listed); err != nil { return err } imported, _ := job.StatsJSON["imported"].(float64) batch := 0 listIndex := int(jsonNumber(job.CursorJSON["listIndex"])) for i := listIndex; i < len(listed.Value) && batch < mailImportBatchSize(); i++ { msg := listed.Value[i] if alreadyImported(items, msg.ID) { listIndex = i + 1 continue } created, err := g.importOne(ctx, accountID, msg) if err != nil { if markErr := items.MarkFailed(ctx, msg.ID, err.Error(), ""); markErr != nil { return markErr } incJobStat(job.StatsJSON, "failed") batch++ listIndex = i + 1 continue } if err := items.MarkImported(ctx, msg.ID); err != nil { return err } if created { imported++ } batch++ listIndex = i + 1 } job.StatsJSON["imported"] = imported job.CursorJSON["listIndex"] = float64(listIndex) if listIndex < len(listed.Value) { return update("pending", job.CursorJSON, job.StatsJSON, "") } delete(job.CursorJSON, "listIndex") if listed.NextLink != "" { job.CursorJSON["nextLink"] = listed.NextLink return update("pending", job.CursorJSON, job.StatsJSON, "") } delete(job.CursorJSON, "nextLink") if delta { if listed.DeltaLink != "" { job.CursorJSON["deltaLink"] = listed.DeltaLink } else if link, err := g.initDeltaLink(ctx, accessToken); err == nil && link != "" { job.CursorJSON["deltaLink"] = link } } job.StatsJSON["phase"] = "imported" return update("completed", job.CursorJSON, job.StatsJSON, "") } func (g *GraphImporter) importDeltaPage(ctx context.Context, job *Job, accessToken, accountID, deltaLink string, items *ImportedItemStore) (more bool, err error) { body, err := g.apiGet(ctx, deltaLink, accessToken) if err != nil { return false, err } var parsed struct { Value []graphMessage `json:"value"` NextLink string `json:"@odata.nextLink"` DeltaLink string `json:"@odata.deltaLink"` } if err := json.Unmarshal(body, &parsed); err != nil { return false, err } deltaCount, _ := job.StatsJSON["delta_imported"].(float64) deleted, _ := job.StatsJSON["delta_deleted"].(float64) for _, msg := range parsed.Value { if msg.Removed != nil { if err := g.deleteByGraphID(ctx, accountID, msg.ID); err != nil { return false, err } deleted++ continue } if alreadyImported(items, msg.ID) { continue } ok, err := g.importOne(ctx, accountID, msg) if err != nil { if markErr := items.MarkFailed(ctx, msg.ID, err.Error(), ""); markErr != nil { return false, markErr } incJobStat(job.StatsJSON, "failed") continue } if err := items.MarkImported(ctx, msg.ID); err != nil { return false, err } if ok { deltaCount++ } } job.StatsJSON["delta_imported"] = deltaCount job.StatsJSON["delta_deleted"] = deleted if parsed.NextLink != "" { job.CursorJSON["deltaLink"] = parsed.NextLink job.StatsJSON["phase"] = "delta" return true, nil } if parsed.DeltaLink != "" { job.CursorJSON["deltaLink"] = parsed.DeltaLink } job.StatsJSON["phase"] = "delta" return false, nil } func (g *GraphImporter) initDeltaLink(ctx context.Context, accessToken string) (string, error) { body, err := g.apiGet(ctx, g.graphURL(g.userBase()+"/messages/delta?$select=id"), accessToken) if err != nil { return "", err } var parsed struct { DeltaLink string `json:"@odata.deltaLink"` NextLink string `json:"@odata.nextLink"` } if err := json.Unmarshal(body, &parsed); err != nil { return "", err } if parsed.DeltaLink != "" { return parsed.DeltaLink, nil } return parsed.NextLink, nil } func (g *GraphImporter) importOne(ctx context.Context, accountID string, msg graphMessage) (bool, error) { meta := g.folders[msg.ParentFolderID] if meta.RemoteName == "" { meta = graphFolderMeta{RemoteName: "ARCHIVE", FolderType: "archive"} } folderID, err := ensureMailFolder(ctx, g.db, accountID, displayFolderName(meta.RemoteName, meta.FolderType), meta.RemoteName, meta.FolderType) if err != nil { return false, err } headers := indexGraphHeaders(msg.InternetMessageHeaders) rfcID := threading.NormalizeMessageID(msg.InternetMessageID) if rfcID == "" { rfcID = threading.NormalizeMessageID(headers["message-id"]) } if rfcID == "" { rfcID = threading.NormalizeMessageID("") } inReplyTo := threading.NormalizeMessageID(headers["in-reply-to"]) references := parseReferences(headers["references"]) bodyText, bodyHTML := extractGraphBody(msg.Body) snippet := strings.TrimSpace(msg.BodyPreview) if snippet == "" { snippet = truncateRunes(bodyText, 200) } date := parseGraphTime(msg.ReceivedDateTime) if date.IsZero() { date = parseGraphTime(msg.SentDateTime) } if date.IsZero() { date = time.Now().UTC() } fromJSON := graphRecipientJSON(msg.From) toJSON := graphRecipientsJSON(msg.ToRecipients) ccJSON := graphRecipientsJSON(msg.CcRecipients) replyToJSON := graphRecipientsJSON(msg.ReplyTo) flags := graphFlags(msg.IsRead, msg.Flag.FlagStatus) uid := remoteMessageUID(msg.ID) var messageID string var existed bool _ = g.db.QueryRow(ctx, `SELECT EXISTS(SELECT 1 FROM messages WHERE folder_id = $1 AND uid = $2)`, folderID, uid).Scan(&existed) err = g.db.QueryRow(ctx, ` INSERT INTO messages ( account_id, folder_id, uid, message_id, subject, from_addr, to_addrs, cc_addrs, reply_to, date, snippet, body_text, body_html, flags, labels, in_reply_to, references_header ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17) ON CONFLICT (folder_id, uid) DO UPDATE SET message_id = EXCLUDED.message_id, subject = EXCLUDED.subject, from_addr = EXCLUDED.from_addr, to_addrs = EXCLUDED.to_addrs, cc_addrs = EXCLUDED.cc_addrs, reply_to = EXCLUDED.reply_to, date = EXCLUDED.date, snippet = EXCLUDED.snippet, body_text = EXCLUDED.body_text, body_html = EXCLUDED.body_html, flags = EXCLUDED.flags, in_reply_to = EXCLUDED.in_reply_to, references_header = EXCLUDED.references_header, updated_at = NOW() RETURNING id `, accountID, folderID, uid, rfcID, msg.Subject, fromJSON, toJSON, ccJSON, replyToJSON, date, snippet, bodyText, sanitize.SanitizeHTML(bodyHTML), flags, []string{}, inReplyTo, references, ).Scan(&messageID) if err != nil { return false, err } if err := threading.ApplyMessageThread(ctx, g.db, accountID, messageID, rfcID, inReplyTo, references); err != nil { return false, err } return !existed, nil } func (g *GraphImporter) deleteByGraphID(ctx context.Context, accountID, graphID string) error { if strings.TrimSpace(graphID) == "" { return nil } uid := remoteMessageUID(graphID) _, err := g.db.Exec(ctx, `DELETE FROM messages WHERE account_id = $1::uuid AND uid = $2`, accountID, uid) return err } func (g *GraphImporter) ensureGraphFolders(ctx context.Context, accessToken string) error { if len(g.folders) > 0 { return nil } body, err := g.apiGet(ctx, g.graphURL(g.userBase()+"/mailFolders?$top=100&$select=id,displayName,wellKnownName"), accessToken) if err != nil { return err } var parsed struct { Value []struct { ID string `json:"id"` DisplayName string `json:"displayName"` WellKnownName string `json:"wellKnownName"` } `json:"value"` } if err := json.Unmarshal(body, &parsed); err != nil { return err } for _, f := range parsed.Value { remote, ftype := graphWellKnownFolder(f.WellKnownName, f.DisplayName) g.folders[f.ID] = graphFolderMeta{RemoteName: remote, FolderType: ftype} } return nil } func graphWellKnownFolder(wellKnown, displayName string) (remoteName, folderType string) { switch strings.ToLower(strings.TrimSpace(wellKnown)) { case "inbox": return "INBOX", "inbox" case "sentitems": return "SENT", "sent" case "drafts": return "DRAFT", "drafts" case "deleteditems": return "TRASH", "trash" case "junkemail": return "SPAM", "spam" case "archive": return "ARCHIVE", "archive" default: name := strings.TrimSpace(displayName) if name == "" { name = "CUSTOM" } return strings.ToUpper(strings.ReplaceAll(name, " ", "_")), "custom" } } func graphFlags(isRead bool, flagStatus string) []string { flags := []string{} if isRead { flags = append(flags, "\\Seen") } if strings.EqualFold(flagStatus, "flagged") { flags = append(flags, "\\Flagged") } return flags } func extractGraphBody(body graphBody) (text, html string) { content := body.Content switch strings.ToLower(body.ContentType) { case "html": html = content case "text": text = content default: text = content } return text, html } func graphRecipientJSON(r graphRecipient) []byte { if strings.TrimSpace(r.EmailAddress.Address) == "" { return []byte("[]") } type addr struct { Name string `json:"name,omitempty"` Email string `json:"email"` } b, _ := json.Marshal([]addr{{Name: r.EmailAddress.Name, Email: strings.ToLower(r.EmailAddress.Address)}}) return b } func graphRecipientsJSON(recipients []graphRecipient) []byte { type addr struct { Name string `json:"name,omitempty"` Email string `json:"email"` } out := make([]addr, 0, len(recipients)) for _, r := range recipients { email := strings.ToLower(strings.TrimSpace(r.EmailAddress.Address)) if email == "" { continue } out = append(out, addr{Name: r.EmailAddress.Name, Email: email}) } b, _ := json.Marshal(out) return b } func indexGraphHeaders(headers []graphHeader) map[string]string { out := map[string]string{} for _, h := range headers { key := strings.ToLower(strings.TrimSpace(h.Name)) if key != "" && out[key] == "" { out[key] = h.Value } } return out } func parseGraphTime(raw string) time.Time { raw = strings.TrimSpace(raw) if raw == "" { return time.Time{} } if t, err := time.Parse(time.RFC3339Nano, raw); err == nil { return t.UTC() } if t, err := time.Parse(time.RFC3339, raw); err == nil { return t.UTC() } return time.Time{} } func (g *GraphImporter) apiGet(ctx context.Context, url, accessToken string) ([]byte, error) { raw, err := apiGet(ctx, g.client, url, accessToken) if err != nil { return nil, fmt.Errorf("graph api: %w", err) } return raw, nil } func (g *GraphImporter) resolveMailAccountID(ctx context.Context, userID string) (string, error) { importer := NewGmailImporter(g.db) return importer.resolveMailAccountID(ctx, userID) } func remoteMessageUID(remoteID string) int64 { return gmailUID(remoteID) }