import type { TipTapJSON } from "@/lib/drive/richtext-import" import type { DocPageHeaderFooter, DocPageSetup } from "@/lib/drive/doc-page-setup" type DocxArchive = Record function decodeXml(bytes: Uint8Array | undefined): string { if (!bytes) return "" return new TextDecoder().decode(bytes) } function xmlTextToParagraphs(xml: string): TipTapJSON[] { const paragraphs: TipTapJSON[] = [] const pBlocks = [...xml.matchAll(/]*>[\s\S]*?<\/w:p>/gi)] if (pBlocks.length === 0) { const text = xml.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim() if (text) { paragraphs.push({ type: "paragraph", content: [{ type: "text", text }] }) } return paragraphs } for (const pMatch of pBlocks) { const pXml = pMatch[0] const runs = [...pXml.matchAll(/]*>([^<]*)<\/w:t>/gi)] const text = runs.map((r) => r[1] ?? "").join("") const content: TipTapJSON[] = [] if (text) { const marks: TipTapJSON[] = [] if (/]*\/>|]*>/.test(pXml)) marks.push({ type: "bold" }) if (/]*\/>|]*>/.test(pXml)) marks.push({ type: "italic" }) if (/]*\/>|]*>/.test(pXml)) marks.push({ type: "underline" }) content.push({ type: "text", text, ...(marks.length ? { marks } : {}) }) } paragraphs.push({ type: "paragraph", content }) } return paragraphs } function parseHeaderFooterPart( archive: DocxArchive, partName: string ): DocPageHeaderFooter | null { const xml = decodeXml(archive[`word/${partName}.xml`]) if (!xml) return null const contentBlocks = xmlTextToParagraphs(xml) if (contentBlocks.length === 0) return null return { content: { type: "doc", content: contentBlocks }, heightMm: 15, } } export type DocxHeaderFooterResult = Pick< DocPageSetup, "header" | "footer" | "headerFooterDifferentFirstPage" > /** Extract header/footer body content from DOCX archive. */ export async function extractDocxHeaderFooter( buffer: ArrayBuffer ): Promise { try { const { unzipSync } = await import("fflate") const archive = unzipSync(new Uint8Array(buffer)) as DocxArchive const header = parseHeaderFooterPart(archive, "header1") ?? parseHeaderFooterPart(archive, "header2") const footer = parseHeaderFooterPart(archive, "footer1") ?? parseHeaderFooterPart(archive, "footer2") const documentXml = decodeXml(archive["word/document.xml"]) const differentFirst = documentXml != null && /