97 lines
2.8 KiB
TypeScript
97 lines
2.8 KiB
TypeScript
import type { TipTapJSON } from "@/lib/drive/richtext-import"
|
|
import type { DocPageHeaderFooter, DocPageSetup } from "@/lib/drive/doc-page-setup"
|
|
|
|
type DocxArchive = Record<string, Uint8Array>
|
|
|
|
function decodeXml(bytes: Uint8Array | undefined): string {
|
|
if (!bytes) return ""
|
|
return new TextDecoder().decode(bytes)
|
|
}
|
|
|
|
function xmlTextToParagraphs(xml: string): TipTapJSON[] {
|
|
const paragraphs: TipTapJSON[] = []
|
|
const pBlocks = [...xml.matchAll(/<w:p\b[^>]*>[\s\S]*?<\/w:p>/gi)]
|
|
|
|
if (pBlocks.length === 0) {
|
|
const text = xml.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim()
|
|
if (text) {
|
|
paragraphs.push({ type: "paragraph", content: [{ type: "text", text }] })
|
|
}
|
|
return paragraphs
|
|
}
|
|
|
|
for (const pMatch of pBlocks) {
|
|
const pXml = pMatch[0]
|
|
const runs = [...pXml.matchAll(/<w:t\b[^>]*>([^<]*)<\/w:t>/gi)]
|
|
const text = runs.map((r) => r[1] ?? "").join("")
|
|
const content: TipTapJSON[] = []
|
|
|
|
if (text) {
|
|
const marks: TipTapJSON[] = []
|
|
if (/<w:b\b[^>]*\/>|<w:b\b[^>]*>/.test(pXml)) marks.push({ type: "bold" })
|
|
if (/<w:i\b[^>]*\/>|<w:i\b[^>]*>/.test(pXml)) marks.push({ type: "italic" })
|
|
if (/<w:u\b[^>]*\/>|<w:u\b[^>]*>/.test(pXml)) marks.push({ type: "underline" })
|
|
content.push({ type: "text", text, ...(marks.length ? { marks } : {}) })
|
|
}
|
|
|
|
paragraphs.push({ type: "paragraph", content })
|
|
}
|
|
|
|
return paragraphs
|
|
}
|
|
|
|
function parseHeaderFooterPart(
|
|
archive: DocxArchive,
|
|
partName: string
|
|
): DocPageHeaderFooter | null {
|
|
const xml = decodeXml(archive[`word/${partName}.xml`])
|
|
if (!xml) return null
|
|
|
|
const contentBlocks = xmlTextToParagraphs(xml)
|
|
if (contentBlocks.length === 0) return null
|
|
|
|
return {
|
|
content: { type: "doc", content: contentBlocks },
|
|
heightMm: 15,
|
|
}
|
|
}
|
|
|
|
export type DocxHeaderFooterResult = Pick<
|
|
DocPageSetup,
|
|
"header" | "footer" | "headerFooterDifferentFirstPage"
|
|
>
|
|
|
|
/** Extract header/footer body content from DOCX archive. */
|
|
export async function extractDocxHeaderFooter(
|
|
buffer: ArrayBuffer
|
|
): Promise<DocxHeaderFooterResult> {
|
|
try {
|
|
const { unzipSync } = await import("fflate")
|
|
const archive = unzipSync(new Uint8Array(buffer)) as DocxArchive
|
|
|
|
const header =
|
|
parseHeaderFooterPart(archive, "header1") ??
|
|
parseHeaderFooterPart(archive, "header2")
|
|
const footer =
|
|
parseHeaderFooterPart(archive, "footer1") ??
|
|
parseHeaderFooterPart(archive, "footer2")
|
|
|
|
const documentXml = decodeXml(archive["word/document.xml"])
|
|
const differentFirst =
|
|
documentXml != null &&
|
|
/<w:titlePg\b/i.test(documentXml)
|
|
|
|
return {
|
|
header: header ?? null,
|
|
footer: footer ?? null,
|
|
headerFooterDifferentFirstPage: differentFirst,
|
|
}
|
|
} catch {
|
|
return {
|
|
header: null,
|
|
footer: null,
|
|
headerFooterDifferentFirstPage: false,
|
|
}
|
|
}
|
|
}
|