- Added endpoints for listing and importing migration rosters. - Introduced audit export functionality for migration jobs in CSV and NDJSON formats. - Implemented tenant mismatch validation for Microsoft migration claims. - Enhanced error handling for email claiming and migration processes. - Added integration tests for roster import and claim workflows.
432 lines
13 KiB
Go
432 lines
13 KiB
Go
package migration
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/url"
|
|
"path"
|
|
"strings"
|
|
)
|
|
|
|
func (d *DriveImporter) hasDriveDeltaCursor(job *Job, provider string) bool {
|
|
if provider == "google" {
|
|
token, _ := job.CursorJSON["driveChangeToken"].(string)
|
|
return strings.TrimSpace(token) != ""
|
|
}
|
|
link, _ := job.CursorJSON["driveDeltaLink"].(string)
|
|
return strings.TrimSpace(link) != ""
|
|
}
|
|
|
|
func (d *DriveImporter) bootstrapDriveDelta(ctx context.Context, accessToken, provider string, cursor map[string]any) error {
|
|
switch provider {
|
|
case "google":
|
|
body, err := apiGet(ctx, d.client, "https://www.googleapis.com/drive/v3/changes/startPageToken?spaces=drive", accessToken)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var parsed struct {
|
|
StartPageToken string `json:"startPageToken"`
|
|
}
|
|
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
return err
|
|
}
|
|
if parsed.StartPageToken != "" {
|
|
cursor["driveChangeToken"] = parsed.StartPageToken
|
|
}
|
|
return nil
|
|
default:
|
|
link, err := d.walkMicrosoftDriveDelta(ctx, accessToken, graphMicrosoftURL(d.userUPN, "/drive/root/delta?$select=id,name,folder,file,size,parentReference,deleted"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if link != "" {
|
|
cursor["driveDeltaLink"] = link
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (d *DriveImporter) walkMicrosoftDriveDelta(ctx context.Context, accessToken, listURL string) (string, error) {
|
|
for listURL != "" {
|
|
body, err := apiGet(ctx, d.client, listURL, accessToken)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
var parsed struct {
|
|
NextLink string `json:"@odata.nextLink"`
|
|
DeltaLink string `json:"@odata.deltaLink"`
|
|
}
|
|
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
return "", err
|
|
}
|
|
if parsed.DeltaLink != "" {
|
|
return parsed.DeltaLink, nil
|
|
}
|
|
listURL = parsed.NextLink
|
|
}
|
|
return "", nil
|
|
}
|
|
|
|
func (d *DriveImporter) importDriveDelta(ctx context.Context, job *Job, accessToken, provider, ncUserID, root string, items *ImportedItemStore, update progressUpdater) error {
|
|
switch provider {
|
|
case "google":
|
|
return d.importGoogleDriveDelta(ctx, job, accessToken, ncUserID, root, items, update)
|
|
default:
|
|
return d.importMicrosoftDriveDelta(ctx, job, accessToken, ncUserID, root, items, update)
|
|
}
|
|
}
|
|
|
|
func (d *DriveImporter) importGoogleDriveDelta(ctx context.Context, job *Job, accessToken, ncUserID, root string, items *ImportedItemStore, update progressUpdater) error {
|
|
pageToken, _ := job.CursorJSON["driveChangeToken"].(string)
|
|
if pageToken == "" {
|
|
return fmt.Errorf("google drive delta token missing")
|
|
}
|
|
|
|
listURL := "https://www.googleapis.com/drive/v3/changes?pageSize=100&spaces=drive&includeRemoved=true" +
|
|
"&includeItemsFromAllDrives=true&supportsAllDrives=true&fields=" +
|
|
url.QueryEscape("nextPageToken,newStartPageToken,changes(fileId,removed,file(id,name,mimeType,size,parents,trashed,driveId))") +
|
|
"&pageToken=" + url.QueryEscape(pageToken)
|
|
|
|
body, err := apiGet(ctx, d.client, listURL, accessToken)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var parsed struct {
|
|
Changes []googleDriveChange `json:"changes"`
|
|
NextPageToken string `json:"nextPageToken"`
|
|
NewStartPageToken string `json:"newStartPageToken"`
|
|
}
|
|
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
return err
|
|
}
|
|
|
|
imported, _ := job.StatsJSON["delta_imported"].(float64)
|
|
deleted, _ := job.StatsJSON["delta_deleted"].(float64)
|
|
exported, _ := job.StatsJSON["exported"].(float64)
|
|
skipped, _ := job.StatsJSON["skipped"].(float64)
|
|
batch := 0
|
|
listIndex := int(jsonNumber(job.CursorJSON["listIndex"]))
|
|
|
|
for i := listIndex; i < len(parsed.Changes) && batch < driveImportBatchSize(); i++ {
|
|
change := parsed.Changes[i]
|
|
if change.Removed || change.File == nil || change.File.Trashed {
|
|
if err := d.deleteDriveItem(ctx, ncUserID, root, items, change.FileID); err != nil {
|
|
return err
|
|
}
|
|
deleted++
|
|
batch++
|
|
continue
|
|
}
|
|
item := googleFileToDriveItem(*change.File)
|
|
relPath := d.resolveDriveRelPath(items, item)
|
|
if err := d.uploadDriveItem(ctx, job, accessToken, ncUserID, root, relPath, item, items, &imported, &exported, &skipped, job.StatsJSON); err != nil {
|
|
return err
|
|
}
|
|
batch++
|
|
}
|
|
|
|
job.StatsJSON["delta_imported"] = imported
|
|
job.StatsJSON["delta_deleted"] = deleted
|
|
job.StatsJSON["exported"] = exported
|
|
job.StatsJSON["skipped"] = skipped
|
|
|
|
if listIndex+batch < len(parsed.Changes) {
|
|
job.CursorJSON["listIndex"] = float64(listIndex + batch)
|
|
return update("pending", job.CursorJSON, job.StatsJSON, "")
|
|
}
|
|
delete(job.CursorJSON, "listIndex")
|
|
|
|
if parsed.NextPageToken != "" {
|
|
job.CursorJSON["driveChangeToken"] = parsed.NextPageToken
|
|
return update("pending", job.CursorJSON, job.StatsJSON, "")
|
|
}
|
|
if parsed.NewStartPageToken != "" {
|
|
job.CursorJSON["driveChangeToken"] = parsed.NewStartPageToken
|
|
}
|
|
job.StatsJSON["phase"] = "delta"
|
|
return update("completed", job.CursorJSON, job.StatsJSON, "")
|
|
}
|
|
|
|
type googleDriveChange struct {
|
|
FileID string `json:"fileId"`
|
|
Removed bool `json:"removed"`
|
|
File *googleDriveFile `json:"file"`
|
|
}
|
|
|
|
type googleDriveFile struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
MimeType string `json:"mimeType"`
|
|
Size string `json:"size"`
|
|
Parents []string `json:"parents"`
|
|
Trashed bool `json:"trashed"`
|
|
DriveID string `json:"driveId"`
|
|
}
|
|
|
|
func googleFileToDriveItem(f googleDriveFile) driveItem {
|
|
size := int64(0)
|
|
if f.Size != "" {
|
|
fmt.Sscan(f.Size, &size)
|
|
}
|
|
item := driveItem{
|
|
ID: f.ID,
|
|
Name: f.Name,
|
|
IsFolder: f.MimeType == "application/vnd.google-apps.folder",
|
|
Size: size,
|
|
MimeType: f.MimeType,
|
|
DriveID: f.DriveID,
|
|
}
|
|
if len(f.Parents) > 0 {
|
|
item.ParentID = f.Parents[0]
|
|
}
|
|
if item.IsFolder {
|
|
return item
|
|
}
|
|
if exportMime, ext, ok := googleWorkspaceExport(f.MimeType); ok {
|
|
item.Export = true
|
|
item.ExportMime = exportMime
|
|
item.ExportExt = ext
|
|
item.Name = driveExportFileName(f.Name, ext)
|
|
} else {
|
|
item.Download = googleDriveDownloadURL(f.ID, f.DriveID != "")
|
|
}
|
|
return item
|
|
}
|
|
|
|
func (d *DriveImporter) importMicrosoftDriveDelta(ctx context.Context, job *Job, accessToken, ncUserID, root string, items *ImportedItemStore, update progressUpdater) error {
|
|
deltaLink, _ := job.CursorJSON["driveDeltaLink"].(string)
|
|
if deltaLink == "" {
|
|
return fmt.Errorf("microsoft drive delta link missing")
|
|
}
|
|
|
|
body, err := apiGet(ctx, d.client, deltaLink, accessToken)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var parsed struct {
|
|
Value []graphDriveItem `json:"value"`
|
|
NextLink string `json:"@odata.nextLink"`
|
|
DeltaLink string `json:"@odata.deltaLink"`
|
|
}
|
|
if err := json.Unmarshal(body, &parsed); err != nil {
|
|
return err
|
|
}
|
|
|
|
imported, _ := job.StatsJSON["delta_imported"].(float64)
|
|
deleted, _ := job.StatsJSON["delta_deleted"].(float64)
|
|
skipped, _ := job.StatsJSON["skipped"].(float64)
|
|
batch := 0
|
|
listIndex := int(jsonNumber(job.CursorJSON["listIndex"]))
|
|
|
|
for i := listIndex; i < len(parsed.Value) && batch < driveImportBatchSize(); i++ {
|
|
item := parsed.Value[i]
|
|
sourceID := strings.TrimSpace(item.ID)
|
|
if sourceID == "" {
|
|
continue
|
|
}
|
|
if item.Removed != nil || item.Deleted != nil {
|
|
if err := d.deleteDriveItem(ctx, ncUserID, root, items, sourceID); err != nil {
|
|
return err
|
|
}
|
|
deleted++
|
|
batch++
|
|
continue
|
|
}
|
|
driveItem := graphDriveToItem(d.userUPN, item)
|
|
relPath := d.resolveDriveRelPath(items, driveItem)
|
|
if err := d.uploadDriveItem(ctx, job, accessToken, ncUserID, root, relPath, driveItem, items, &imported, nil, &skipped, job.StatsJSON); err != nil {
|
|
return err
|
|
}
|
|
batch++
|
|
}
|
|
|
|
job.StatsJSON["delta_imported"] = imported
|
|
job.StatsJSON["delta_deleted"] = deleted
|
|
job.StatsJSON["skipped"] = skipped
|
|
|
|
if listIndex+batch < len(parsed.Value) {
|
|
job.CursorJSON["listIndex"] = float64(listIndex + batch)
|
|
return update("pending", job.CursorJSON, job.StatsJSON, "")
|
|
}
|
|
delete(job.CursorJSON, "listIndex")
|
|
|
|
if parsed.NextLink != "" {
|
|
job.CursorJSON["driveDeltaLink"] = parsed.NextLink
|
|
return update("pending", job.CursorJSON, job.StatsJSON, "")
|
|
}
|
|
if parsed.DeltaLink != "" {
|
|
job.CursorJSON["driveDeltaLink"] = parsed.DeltaLink
|
|
}
|
|
job.StatsJSON["phase"] = "delta"
|
|
return update("completed", job.CursorJSON, job.StatsJSON, "")
|
|
}
|
|
|
|
type graphDriveItem struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Size int64 `json:"size"`
|
|
Folder *struct{ ChildCount int `json:"childCount"` } `json:"folder"`
|
|
File *struct{ MimeType string `json:"mimeType"` } `json:"file"`
|
|
ParentReference *struct {
|
|
ID string `json:"id"`
|
|
} `json:"parentReference"`
|
|
Removed *struct {
|
|
Reason string `json:"reason"`
|
|
} `json:"@removed"`
|
|
Deleted *struct{} `json:"deleted"`
|
|
}
|
|
|
|
func graphDriveToItem(userUPN string, item graphDriveItem) driveItem {
|
|
out := driveItem{
|
|
ID: item.ID,
|
|
Name: item.Name,
|
|
Size: item.Size,
|
|
}
|
|
if item.ParentReference != nil {
|
|
out.ParentID = item.ParentReference.ID
|
|
}
|
|
if item.Folder != nil {
|
|
out.IsFolder = true
|
|
return out
|
|
}
|
|
mime := ""
|
|
if item.File != nil {
|
|
mime = item.File.MimeType
|
|
}
|
|
out.MimeType = mime
|
|
out.Download = graphMicrosoftURL(userUPN, "/drive/items/"+url.PathEscape(item.ID)+"/content")
|
|
return out
|
|
}
|
|
|
|
func (d *DriveImporter) resolveDriveRelPath(items *ImportedItemStore, item driveItem) string {
|
|
if stored := items.Path(item.ID); stored != "" {
|
|
return stored
|
|
}
|
|
parentRel := ""
|
|
if item.ParentID != "" {
|
|
parentRel = items.Path(item.ParentID)
|
|
}
|
|
return path.Join(parentRel, sanitizeDrivePath(item.Name))
|
|
}
|
|
|
|
func (d *DriveImporter) uploadDriveItem(ctx context.Context, job *Job, accessToken, ncUserID, root, relPath string, item driveItem, items *ImportedItemStore, imported, exported, skipped *float64, stats map[string]any) error {
|
|
targetPath := path.Join(root, relPath)
|
|
shared := item.DriveID != ""
|
|
if d.alreadyImportedShared(item.DriveID, item.ID, shared) {
|
|
if skipped != nil {
|
|
*skipped++
|
|
}
|
|
return items.MarkSkipped(ctx, item.ID, "dedup: shared drive file already imported by project", relPath)
|
|
}
|
|
if item.IsFolder {
|
|
if err := d.nc.CreateFolder(ctx, ncUserID, targetPath); err != nil {
|
|
if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
|
|
return markErr
|
|
}
|
|
incJobStat(stats, "failed")
|
|
return nil
|
|
}
|
|
if err := items.MarkPath(ctx, item.ID, relPath); err != nil {
|
|
return err
|
|
}
|
|
if imported != nil {
|
|
*imported++
|
|
}
|
|
return nil
|
|
}
|
|
if item.Export {
|
|
content, contentType, fileName, err := d.downloadGoogleExport(ctx, accessToken, item)
|
|
if err != nil {
|
|
if skipped != nil {
|
|
*skipped++
|
|
}
|
|
return items.MarkSkipped(ctx, item.ID, "export: "+err.Error(), relPath)
|
|
}
|
|
targetPath = path.Join(path.Dir(targetPath), fileName)
|
|
relPath = path.Join(path.Dir(relPath), fileName)
|
|
if err := d.uploadToNextcloud(ctx, ncUserID, targetPath, content, contentType, 0); err != nil {
|
|
if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
|
|
return markErr
|
|
}
|
|
incJobStat(stats, "failed")
|
|
return nil
|
|
}
|
|
if exported != nil {
|
|
*exported++
|
|
}
|
|
if pdfMime, pdfExt, ok := googleSlidesPDFExport(item.MimeType); ok {
|
|
pdfItem := item
|
|
pdfItem.ExportMime = pdfMime
|
|
pdfItem.ExportExt = pdfExt
|
|
pdfContent, pdfType, pdfName, err := d.downloadGoogleExport(ctx, accessToken, pdfItem)
|
|
if err == nil {
|
|
pdfRel := path.Join(path.Dir(relPath), pdfName)
|
|
pdfTarget := path.Join(root, pdfRel)
|
|
if err := d.nc.Upload(ctx, ncUserID, pdfTarget, pdfContent, pdfType); err == nil {
|
|
if err := items.MarkPath(ctx, item.ID+"_pdf", pdfRel); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
content, contentType, err := d.downloadDriveFile(ctx, accessToken, item)
|
|
if err != nil {
|
|
if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
|
|
return markErr
|
|
}
|
|
incJobStat(stats, "failed")
|
|
return nil
|
|
}
|
|
if err := d.uploadToNextcloud(ctx, ncUserID, targetPath, content, contentType, item.Size); err != nil {
|
|
if markErr := items.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
|
|
return markErr
|
|
}
|
|
incJobStat(stats, "failed")
|
|
return nil
|
|
}
|
|
}
|
|
if err := items.MarkImported(ctx, item.ID); err != nil {
|
|
return err
|
|
}
|
|
if err := items.MarkPath(ctx, item.ID, relPath); err != nil {
|
|
return err
|
|
}
|
|
if err := d.markSharedImported(ctx, item.DriveID, item.ID, relPath, job.ID, shared); err != nil {
|
|
return err
|
|
}
|
|
if imported != nil {
|
|
*imported++
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *DriveImporter) deleteDriveItem(ctx context.Context, ncUserID, root string, items *ImportedItemStore, fileID string) error {
|
|
if fileID == "" {
|
|
return nil
|
|
}
|
|
for _, suffix := range []string{"", "_pdf"} {
|
|
rel := items.Path(fileID + suffix)
|
|
if rel == "" {
|
|
continue
|
|
}
|
|
target := path.Join(root, rel)
|
|
if err := d.nc.Delete(ctx, ncUserID, target); err != nil && !isDeleteNotFound(err) {
|
|
return err
|
|
}
|
|
}
|
|
if err := items.Unmark(ctx, fileID); err != nil {
|
|
return err
|
|
}
|
|
return items.Unmark(ctx, fileID+"_pdf")
|
|
}
|
|
|
|
func isDeleteNotFound(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
msg := strings.ToLower(err.Error())
|
|
return strings.Contains(msg, "404") || strings.Contains(msg, "not found")
|
|
}
|