ultisuite-backend/internal/migration/drive_import.go
R3D347HR4Y 7143a36c19
Some checks are pending
CI / Go tests (push) Waiting to run
CI / Integration tests (push) Waiting to run
CI / DB migrations (push) Waiting to run
feat(mail): integrate Stalwart hosted mail and migration features
- Added configuration options for Stalwart hosted mail in .env.example.
- Updated Docker Compose to include Stalwart service with health checks.
- Introduced new API endpoints for managing mail domains and migration projects.
- Enhanced Authentik blueprints for user enrollment and post-migration security.
- Updated OAuth handling for Google and Microsoft migration processes.
- Improved error handling and response structures in the mail API.
- Added integration tests for email claiming and migration workflows.
2026-06-13 12:47:08 +02:00

406 lines
12 KiB
Go

package migration
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
"strings"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/ultisuite/ulti-backend/internal/nextcloud"
)
const maxDriveFileBytes = 25 * 1024 * 1024
type DriveImporter struct {
db *pgxpool.Pool
nc *nextcloud.Client
client *http.Client
userUPN string
}
func NewDriveImporter(db *pgxpool.Pool, nc *nextcloud.Client) *DriveImporter {
return &DriveImporter{db: db, nc: nc, client: migrationHTTPClient()}
}
func (d *DriveImporter) WithUserPrincipal(upn string) *DriveImporter {
d.userUPN = strings.TrimSpace(upn)
return d
}
func (d *DriveImporter) WithHTTPClient(c *http.Client) *DriveImporter {
if c != nil {
d.client = c
}
return d
}
func (d *DriveImporter) ImportBatch(ctx context.Context, job *Job, accessToken, provider string, delta bool, update progressUpdater) error {
if d.nc == nil {
return fmt.Errorf("nextcloud required for drive migration")
}
user, err := resolveMigrationUser(ctx, d.db, job.UserID)
if err != nil {
return err
}
ncUserID := nextcloud.UserIDFromClaims(user.Email, user.ExternalID)
if _, err := d.nc.EnsurePrincipal(ctx, user.Email, user.ExternalID, user.Name); err != nil {
return fmt.Errorf("nextcloud user: %w", err)
}
root := fmt.Sprintf("/Migration/%s", provider)
_ = d.nc.CreateFolder(ctx, ncUserID, root)
store, err := LoadImportedItemStore(ctx, d.db, job.ID, job.CursorJSON)
if err != nil {
return err
}
if delta && d.hasDriveDeltaCursor(job, provider) {
return d.importDriveDelta(ctx, job, accessToken, provider, ncUserID, root, store, update)
}
imported, _ := job.StatsJSON["imported"].(float64)
skipped, _ := job.StatsJSON["skipped"].(float64)
exported, _ := job.StatsJSON["exported"].(float64)
batch := 0
queue := readDriveFolderQueue(job.CursorJSON, provider)
folderIndex := int(jsonNumber(job.CursorJSON["folderIndex"]))
if folderIndex >= len(queue) {
if delta && !d.hasDriveDeltaCursor(job, provider) {
if err := d.bootstrapDriveDelta(ctx, accessToken, provider, job.CursorJSON); err != nil {
return err
}
}
job.StatsJSON["imported"] = imported
job.StatsJSON["skipped"] = skipped
job.StatsJSON["exported"] = exported
if delta && d.hasDriveDeltaCursor(job, provider) {
job.StatsJSON["phase"] = "delta_ready"
} else {
job.StatsJSON["phase"] = "imported"
}
return update("completed", job.CursorJSON, job.StatsJSON, "")
}
current := queue[folderIndex]
folderItems, nextCursor, subfolders, err := d.listDriveFolderItems(ctx, accessToken, provider, current, job.CursorJSON)
if err != nil {
return err
}
listIndex := int(jsonNumber(job.CursorJSON["listIndex"]))
for i := listIndex; i < len(folderItems) && batch < driveImportBatchSize(); i++ {
item := folderItems[i]
if alreadyImported(store, item.ID) {
continue
}
relPath := path.Join(current.Path, sanitizeDrivePath(item.Name))
targetPath := path.Join(root, relPath)
if item.IsFolder {
if err := d.nc.CreateFolder(ctx, ncUserID, targetPath); err != nil {
if markErr := store.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
return markErr
}
incJobStat(job.StatsJSON, "failed")
batch++
continue
}
if err := store.MarkPath(ctx, item.ID, relPath); err != nil {
return err
}
queue = enqueueDriveFolder(queue, driveFolderRef{ID: item.ID, Path: relPath})
} else {
if item.Export {
content, contentType, fileName, err := d.downloadGoogleExport(ctx, accessToken, item)
if err != nil {
skipped++
if err := store.MarkSkipped(ctx, item.ID, "export: "+err.Error(), relPath); err != nil {
return err
}
batch++
continue
}
targetPath = path.Join(path.Dir(targetPath), fileName)
relPath = path.Join(path.Dir(relPath), fileName)
if err := d.nc.Upload(ctx, ncUserID, targetPath, content, contentType); err != nil {
if markErr := store.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
return markErr
}
incJobStat(job.StatsJSON, "failed")
batch++
continue
}
exported++
if pdfMime, pdfExt, ok := googleSlidesPDFExport(item.MimeType); ok {
pdfItem := item
pdfItem.ExportMime = pdfMime
pdfItem.ExportExt = pdfExt
if pdfContent, pdfType, pdfName, err := d.downloadGoogleExport(ctx, accessToken, pdfItem); err == nil {
pdfRel := path.Join(path.Dir(relPath), pdfName)
pdfTarget := path.Join(root, pdfRel)
if err := d.nc.Upload(ctx, ncUserID, pdfTarget, pdfContent, pdfType); err == nil {
if err := store.MarkPath(ctx, item.ID+"_pdf", pdfRel); err != nil {
return err
}
}
}
}
} else {
if item.Size > maxDriveFileBytes {
skipped++
reason := fmt.Sprintf("file exceeds %d byte limit", maxDriveFileBytes)
if err := store.MarkSkipped(ctx, item.ID, reason, relPath); err != nil {
return err
}
batch++
continue
}
content, contentType, err := d.downloadDriveFile(ctx, accessToken, item)
if err != nil {
if markErr := store.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
return markErr
}
incJobStat(job.StatsJSON, "failed")
batch++
continue
}
if err := d.nc.Upload(ctx, ncUserID, targetPath, content, contentType); err != nil {
if markErr := store.MarkFailed(ctx, item.ID, err.Error(), relPath); markErr != nil {
return markErr
}
incJobStat(job.StatsJSON, "failed")
batch++
continue
}
}
}
if err := store.MarkImported(ctx, item.ID); err != nil {
return err
}
if !item.IsFolder {
if err := store.MarkPath(ctx, item.ID, relPath); err != nil {
return err
}
}
imported++
batch++
}
for _, sub := range subfolders {
relPath := path.Join(current.Path, sanitizeDrivePath(sub.Name))
queue = enqueueDriveFolder(queue, driveFolderRef{ID: sub.ID, Path: relPath})
}
writeDriveFolderQueue(job.CursorJSON, queue)
job.StatsJSON["imported"] = imported
job.StatsJSON["skipped"] = skipped
job.StatsJSON["exported"] = exported
if listIndex+batch < len(folderItems) {
job.CursorJSON["listIndex"] = float64(listIndex + batch)
return update("pending", job.CursorJSON, job.StatsJSON, "")
}
delete(job.CursorJSON, "listIndex")
if nextCursor != "" {
if provider == "google" {
job.CursorJSON["pageToken"] = nextCursor
} else {
job.CursorJSON["nextLink"] = nextCursor
}
return update("pending", job.CursorJSON, job.StatsJSON, "")
}
delete(job.CursorJSON, "pageToken")
delete(job.CursorJSON, "nextLink")
job.CursorJSON["folderIndex"] = float64(folderIndex + 1)
delete(job.CursorJSON, "listIndex")
return update("pending", job.CursorJSON, job.StatsJSON, "")
}
type driveItem struct {
ID string
Name string
ParentID string
IsFolder bool
Size int64
MimeType string
Download string
Export bool
ExportMime string
ExportExt string
}
type driveSubfolder struct {
ID string
Name string
}
func (d *DriveImporter) listDriveFolderItems(ctx context.Context, accessToken, provider string, folder driveFolderRef, cursor map[string]any) ([]driveItem, string, []driveSubfolder, error) {
switch provider {
case "google":
pageToken, _ := cursor["pageToken"].(string)
q := url.QueryEscape("'" + folder.ID + "' in parents and trashed=false")
listURL := "https://www.googleapis.com/drive/v3/files?pageSize=100&fields=nextPageToken,files(id,name,mimeType,size)&q=" + q
if pageToken != "" {
listURL += "&pageToken=" + url.QueryEscape(pageToken)
}
body, err := apiGet(ctx, d.client, listURL, accessToken)
if err != nil {
return nil, "", nil, err
}
var parsed struct {
Files []struct {
ID string `json:"id"`
Name string `json:"name"`
MimeType string `json:"mimeType"`
Size string `json:"size"`
} `json:"files"`
NextPageToken string `json:"nextPageToken"`
}
if err := json.Unmarshal(body, &parsed); err != nil {
return nil, "", nil, err
}
out := make([]driveItem, 0, len(parsed.Files))
for _, f := range parsed.Files {
size := int64(0)
if f.Size != "" {
fmt.Sscan(f.Size, &size)
}
item := driveItem{
ID: f.ID,
Name: f.Name,
IsFolder: f.MimeType == "application/vnd.google-apps.folder",
Size: size,
MimeType: f.MimeType,
}
if item.IsFolder {
out = append(out, item)
continue
}
if exportMime, ext, ok := googleWorkspaceExport(f.MimeType); ok {
item.Export = true
item.ExportMime = exportMime
item.ExportExt = ext
item.Name = driveExportFileName(f.Name, ext)
} else {
item.Download = "https://www.googleapis.com/drive/v3/files/" + url.PathEscape(f.ID) + "?alt=media"
}
out = append(out, item)
}
return out, parsed.NextPageToken, nil, nil
default:
nextLink, _ := cursor["nextLink"].(string)
var listURL string
if folder.ID == "root" {
listURL = graphMicrosoftURL(d.userUPN, "/drive/root/children?$top=100&$select=id,name,folder,file,size")
} else {
listURL = graphMicrosoftURL(d.userUPN, "/drive/items/"+url.PathEscape(folder.ID)+"/children?$top=100&$select=id,name,folder,file,size")
}
if nextLink != "" {
listURL = nextLink
}
body, err := apiGet(ctx, d.client, listURL, accessToken)
if err != nil {
return nil, "", nil, err
}
var parsed struct {
Value []struct {
ID string `json:"id"`
Name string `json:"name"`
Folder *struct {
ChildCount int `json:"childCount"`
} `json:"folder"`
File *struct {
MimeType string `json:"mimeType"`
} `json:"file"`
Size int64 `json:"size"`
} `json:"value"`
NextLink string `json:"@odata.nextLink"`
}
if err := json.Unmarshal(body, &parsed); err != nil {
return nil, "", nil, err
}
out := make([]driveItem, 0, len(parsed.Value))
var subs []driveSubfolder
for _, f := range parsed.Value {
if f.Folder != nil {
out = append(out, driveItem{ID: f.ID, Name: f.Name, IsFolder: true})
if f.Folder.ChildCount > 0 {
subs = append(subs, driveSubfolder{ID: f.ID, Name: f.Name})
}
continue
}
mime := ""
if f.File != nil {
mime = f.File.MimeType
}
out = append(out, driveItem{
ID: f.ID,
Name: f.Name,
Size: f.Size,
MimeType: mime,
Download: graphMicrosoftURL(d.userUPN, "/drive/items/"+url.PathEscape(f.ID)+"/content"),
})
}
return out, parsed.NextLink, subs, nil
}
}
func (d *DriveImporter) downloadDriveFile(ctx context.Context, accessToken string, item driveItem) (io.ReadCloser, string, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, item.Download, nil)
if err != nil {
return nil, "", err
}
req.Header.Set("Authorization", "Bearer "+accessToken)
resp, err := migrationDo(ctx, d.client, req)
if err != nil {
return nil, "", fmt.Errorf("download %s: %w", item.Name, err)
}
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
contentType = item.MimeType
}
if contentType == "" {
contentType = "application/octet-stream"
}
return resp.Body, contentType, nil
}
func (d *DriveImporter) downloadGoogleExport(ctx context.Context, accessToken string, item driveItem) (io.ReadCloser, string, string, error) {
exportURL := fmt.Sprintf(
"https://www.googleapis.com/drive/v3/files/%s/export?mimeType=%s",
url.PathEscape(item.ID),
url.QueryEscape(item.ExportMime),
)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, exportURL, nil)
if err != nil {
return nil, "", "", err
}
req.Header.Set("Authorization", "Bearer "+accessToken)
resp, err := migrationDo(ctx, d.client, req)
if err != nil {
return nil, "", "", fmt.Errorf("export %s: %w", item.Name, err)
}
contentType := resp.Header.Get("Content-Type")
if contentType == "" {
contentType = item.ExportMime
}
return resp.Body, contentType, driveExportFileName(item.Name, item.ExportExt), nil
}
func sanitizeDrivePath(name string) string {
name = strings.TrimSpace(name)
name = strings.ReplaceAll(name, "/", "-")
name = strings.ReplaceAll(name, "\\", "-")
if name == "" {
return "untitled"
}
return name
}