package migration import ( "context" "encoding/json" "fmt" "net/url" "path" "strings" ) func (d *DriveImporter) hasDriveDeltaCursor(job *Job, provider string) bool { if provider == "google" { token, _ := job.CursorJSON["driveChangeToken"].(string) return strings.TrimSpace(token) != "" } link, _ := job.CursorJSON["driveDeltaLink"].(string) return strings.TrimSpace(link) != "" } func (d *DriveImporter) bootstrapDriveDelta(ctx context.Context, accessToken, provider string, cursor map[string]any) error { switch provider { case "google": body, err := apiGet(ctx, d.client, "https://www.googleapis.com/drive/v3/changes/startPageToken?spaces=drive", accessToken) if err != nil { return err } var parsed struct { StartPageToken string `json:"startPageToken"` } if err := json.Unmarshal(body, &parsed); err != nil { return err } if parsed.StartPageToken != "" { cursor["driveChangeToken"] = parsed.StartPageToken } return nil default: link, err := d.walkMicrosoftDriveDelta(ctx, accessToken, graphMicrosoftURL(d.userUPN, "/drive/root/delta?$select=id,name,folder,file,size,parentReference,deleted")) if err != nil { return err } if link != "" { cursor["driveDeltaLink"] = link } return nil } } func (d *DriveImporter) walkMicrosoftDriveDelta(ctx context.Context, accessToken, listURL string) (string, error) { for listURL != "" { body, err := apiGet(ctx, d.client, listURL, accessToken) if err != nil { return "", err } var parsed struct { NextLink string `json:"@odata.nextLink"` DeltaLink string `json:"@odata.deltaLink"` } if err := json.Unmarshal(body, &parsed); err != nil { return "", err } if parsed.DeltaLink != "" { return parsed.DeltaLink, nil } listURL = parsed.NextLink } return "", nil } func (d *DriveImporter) importDriveDelta(ctx context.Context, job *Job, accessToken, provider, ncUserID, root string, items *ImportedItemStore, update progressUpdater) error { switch provider { case "google": return d.importGoogleDriveDelta(ctx, job, accessToken, ncUserID, root, items, update) default: return d.importMicrosoftDriveDelta(ctx, job, accessToken, ncUserID, root, items, update) } } func (d *DriveImporter) importGoogleDriveDelta(ctx context.Context, job *Job, accessToken, ncUserID, root string, items *ImportedItemStore, update progressUpdater) error { pageToken, _ := job.CursorJSON["driveChangeToken"].(string) if pageToken == "" { return fmt.Errorf("google drive delta token missing") } listURL := "https://www.googleapis.com/drive/v3/changes?pageSize=100&spaces=drive&includeRemoved=true&fields=" + url.QueryEscape("nextPageToken,newStartPageToken,changes(fileId,removed,file(id,name,mimeType,size,parents,trashed))") + "&pageToken=" + url.QueryEscape(pageToken) body, err := apiGet(ctx, d.client, listURL, accessToken) if err != nil { return err } var parsed struct { Changes []googleDriveChange `json:"changes"` NextPageToken string `json:"nextPageToken"` NewStartPageToken string `json:"newStartPageToken"` } if err := json.Unmarshal(body, &parsed); err != nil { return err } imported, _ := job.StatsJSON["delta_imported"].(float64) deleted, _ := job.StatsJSON["delta_deleted"].(float64) exported, _ := job.StatsJSON["exported"].(float64) skipped, _ := job.StatsJSON["skipped"].(float64) batch := 0 listIndex := int(jsonNumber(job.CursorJSON["listIndex"])) for i := listIndex; i < len(parsed.Changes) && batch < driveImportBatchSize(); i++ { change := parsed.Changes[i] if change.Removed || change.File == nil || change.File.Trashed { if err := d.deleteDriveItem(ctx, ncUserID, root, items, change.FileID); err != nil { return err } deleted++ batch++ continue } item := googleFileToDriveItem(*change.File) relPath := d.resolveDriveRelPath(items, item) if err := d.uploadDriveItem(ctx, accessToken, ncUserID, root, relPath, item, items, &imported, &exported, &skipped, job.StatsJSON); err != nil { return err } batch++ } job.StatsJSON["delta_imported"] = imported job.StatsJSON["delta_deleted"] = deleted job.StatsJSON["exported"] = exported job.StatsJSON["skipped"] = skipped if listIndex+batch < len(parsed.Changes) { job.CursorJSON["listIndex"] = float64(listIndex + batch) return update("pending", job.CursorJSON, job.StatsJSON, "") } delete(job.CursorJSON, "listIndex") if parsed.NextPageToken != "" { job.CursorJSON["driveChangeToken"] = parsed.NextPageToken return update("pending", job.CursorJSON, job.StatsJSON, "") } if parsed.NewStartPageToken != "" { job.CursorJSON["driveChangeToken"] = parsed.NewStartPageToken } job.StatsJSON["phase"] = "delta" return update("completed", job.CursorJSON, job.StatsJSON, "") } type googleDriveChange struct { FileID string `json:"fileId"` Removed bool `json:"removed"` File *googleDriveFile `json:"file"` } type googleDriveFile struct { ID string `json:"id"` Name string `json:"name"` MimeType string `json:"mimeType"` Size string `json:"size"` Parents []string `json:"parents"` Trashed bool `json:"trashed"` } func googleFileToDriveItem(f googleDriveFile) driveItem { size := int64(0) if f.Size != "" { fmt.Sscan(f.Size, &size) } item := driveItem{ ID: f.ID, Name: f.Name, IsFolder: f.MimeType == "application/vnd.google-apps.folder", Size: size, MimeType: f.MimeType, } if len(f.Parents) > 0 { item.ParentID = f.Parents[0] } if item.IsFolder { return item } if exportMime, ext, ok := googleWorkspaceExport(f.MimeType); ok { item.Export = true item.ExportMime = exportMime item.ExportExt = ext item.Name = driveExportFileName(f.Name, ext) } else { item.Download = "https://www.googleapis.com/drive/v3/files/" + url.PathEscape(f.ID) + "?alt=media" } return item } func (d *DriveImporter) importMicrosoftDriveDelta(ctx context.Context, job *Job, accessToken, ncUserID, root string, items *ImportedItemStore, update progressUpdater) error { deltaLink, _ := job.CursorJSON["driveDeltaLink"].(string) if deltaLink == "" { return fmt.Errorf("microsoft drive delta link missing") } body, err := apiGet(ctx, d.client, deltaLink, accessToken) if err != nil { return err } var parsed struct { Value []graphDriveItem `json:"value"` NextLink string `json:"@odata.nextLink"` DeltaLink string `json:"@odata.deltaLink"` } if err := json.Unmarshal(body, &parsed); err != nil { return err } imported, _ := job.StatsJSON["delta_imported"].(float64) deleted, _ := job.StatsJSON["delta_deleted"].(float64) skipped, _ := job.StatsJSON["skipped"].(float64) batch := 0 listIndex := int(jsonNumber(job.CursorJSON["listIndex"])) for i := listIndex; i < len(parsed.Value) && batch < driveImportBatchSize(); i++ { item := parsed.Value[i] sourceID := strings.TrimSpace(item.ID) if sourceID == "" { continue } if item.Removed != nil || item.Deleted != nil { if err := d.deleteDriveItem(ctx, ncUserID, root, items, sourceID); err != nil { return err } deleted++ batch++ continue } driveItem := graphDriveToItem(d.userUPN, item) relPath := d.resolveDriveRelPath(items, driveItem) if err := d.uploadDriveItem(ctx, accessToken, ncUserID, root, relPath, driveItem, items, &imported, nil, &skipped, job.StatsJSON); err != nil { return err } batch++ } job.StatsJSON["delta_imported"] = imported job.StatsJSON["delta_deleted"] = deleted job.StatsJSON["skipped"] = skipped if listIndex+batch < len(parsed.Value) { job.CursorJSON["listIndex"] = float64(listIndex + batch) return update("pending", job.CursorJSON, job.StatsJSON, "") } delete(job.CursorJSON, "listIndex") if parsed.NextLink != "" { job.CursorJSON["driveDeltaLink"] = parsed.NextLink return update("pending", job.CursorJSON, job.StatsJSON, "") } if parsed.DeltaLink != "" { job.CursorJSON["driveDeltaLink"] = parsed.DeltaLink } job.StatsJSON["phase"] = "delta" return update("completed", job.CursorJSON, job.StatsJSON, "") } type graphDriveItem struct { ID string `json:"id"` Name string `json:"name"` Size int64 `json:"size"` Folder *struct{ ChildCount int `json:"childCount"` } `json:"folder"` File *struct{ MimeType string `json:"mimeType"` } `json:"file"` ParentReference *struct { ID string `json:"id"` } `json:"parentReference"` Removed *struct { Reason string `json:"reason"` } `json:"@removed"` Deleted *struct{} `json:"deleted"` } func graphDriveToItem(userUPN string, item graphDriveItem) driveItem { out := driveItem{ ID: item.ID, Name: item.Name, Size: item.Size, } if item.ParentReference != nil { out.ParentID = item.ParentReference.ID } if item.Folder != nil { out.IsFolder = true return out } mime := "" if item.File != nil { mime = item.File.MimeType } out.MimeType = mime out.Download = graphMicrosoftURL(userUPN, "/drive/items/"+url.PathEscape(item.ID)+"/content") return out } func (d *DriveImporter) resolveDriveRelPath(items *ImportedItemStore, item driveItem) string { if stored := items.Path(item.ID); stored != "" { return stored } parentRel := "" if item.ParentID != "" { parentRel = items.Path(item.ParentID) } return path.Join(parentRel, sanitizeDrivePath(item.Name)) } func (d *DriveImporter) uploadDriveItem(ctx context.Context, accessToken, ncUserID, root, relPath string, item driveItem, items *ImportedItemStore, imported, exported, skipped *float64, stats map[string]any) error { targetPath := path.Join(root, relPath) if item.IsFolder { if err := d.nc.CreateFolder(ctx, ncUserID, targetPath); err != nil { if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil { return markErr } incJobStat(stats, "failed") return nil } if err := items.MarkPath(ctx, item.ID, relPath); err != nil { return err } if imported != nil { *imported++ } return nil } if item.Export { content, contentType, fileName, err := d.downloadGoogleExport(ctx, accessToken, item) if err != nil { if skipped != nil { *skipped++ } return items.MarkSkipped(ctx, item.ID, "export: "+err.Error(), relPath) } targetPath = path.Join(path.Dir(targetPath), fileName) relPath = path.Join(path.Dir(relPath), fileName) if err := d.nc.Upload(ctx, ncUserID, targetPath, content, contentType); err != nil { if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil { return markErr } incJobStat(stats, "failed") return nil } if exported != nil { *exported++ } if pdfMime, pdfExt, ok := googleSlidesPDFExport(item.MimeType); ok { pdfItem := item pdfItem.ExportMime = pdfMime pdfItem.ExportExt = pdfExt pdfContent, pdfType, pdfName, err := d.downloadGoogleExport(ctx, accessToken, pdfItem) if err == nil { pdfRel := path.Join(path.Dir(relPath), pdfName) pdfTarget := path.Join(root, pdfRel) if err := d.nc.Upload(ctx, ncUserID, pdfTarget, pdfContent, pdfType); err == nil { if err := items.MarkPath(ctx, item.ID+"_pdf", pdfRel); err != nil { return err } } } } } else { if item.Size > maxDriveFileBytes { if skipped != nil { *skipped++ } reason := fmt.Sprintf("file exceeds %d byte limit", maxDriveFileBytes) return items.MarkSkipped(ctx, item.ID, reason, relPath) } content, contentType, err := d.downloadDriveFile(ctx, accessToken, item) if err != nil { if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil { return markErr } incJobStat(stats, "failed") return nil } if err := d.nc.Upload(ctx, ncUserID, targetPath, content, contentType); err != nil { if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil { return markErr } incJobStat(stats, "failed") return nil } } if err := items.MarkImported(ctx, item.ID); err != nil { return err } if err := items.MarkPath(ctx, item.ID, relPath); err != nil { return err } if imported != nil { *imported++ } return nil } func (d *DriveImporter) deleteDriveItem(ctx context.Context, ncUserID, root string, items *ImportedItemStore, fileID string) error { if fileID == "" { return nil } for _, suffix := range []string{"", "_pdf"} { rel := items.Path(fileID + suffix) if rel == "" { continue } target := path.Join(root, rel) if err := d.nc.Delete(ctx, ncUserID, target); err != nil && !isDeleteNotFound(err) { return err } } if err := items.Unmark(ctx, fileID); err != nil { return err } return items.Unmark(ctx, fileID+"_pdf") } func isDeleteNotFound(err error) bool { if err == nil { return false } msg := strings.ToLower(err.Error()) return strings.Contains(msg, "404") || strings.Contains(msg, "not found") }