type DocxArchive = Record function parseWordColor(raw: string | undefined): string { if (!raw || raw.toLowerCase() === "auto") return "#000000" if (raw.startsWith("#")) return raw if (/^[0-9a-f]{6}$/i.test(raw)) return `#${raw}` return "#000000" } export type DocPageFillImage = { src: string mode: "tile" | "frame" | "cover" } export type DocPageWatermark = { kind: "text" | "image" text?: string src?: string color?: string opacity?: number rotationDeg?: number } export type DocPageBackground = { fillImage?: DocPageFillImage | null watermark?: DocPageWatermark | null gradientCss?: string | null } export type DocPageBackgroundLayers = { fillImageStyle?: { backgroundImage: string backgroundSize: string backgroundRepeat: string backgroundPosition: string } watermarkStyle?: { text?: string imageSrc?: string color: string opacity: number rotationDeg: number } gradientCss?: string } function decodeXml(bytes: Uint8Array | undefined): string { if (!bytes) return "" return new TextDecoder().decode(bytes) } export function parseDocxRelationships(relsXml: string): Map { const map = new Map() for (const match of relsXml.matchAll(/]*\/?>/gi)) { const id = match[0].match(/\bId="([^"]+)"/i)?.[1] const target = match[0].match(/\bTarget="([^"]+)"/i)?.[1] if (id && target) map.set(id, target) } return map } function mimeFromMediaPath(path: string): string { const ext = path.split(".").pop()?.toLowerCase() switch (ext) { case "png": return "image/png" case "jpg": case "jpeg": return "image/jpeg" case "gif": return "image/gif" case "webp": return "image/webp" case "bmp": return "image/bmp" default: return "application/octet-stream" } } export function uint8ArrayToDataUrl(bytes: Uint8Array, mime: string): string { let binary = "" for (let i = 0; i < bytes.length; i++) { binary += String.fromCharCode(bytes[i]!) } const base64 = typeof btoa === "function" ? btoa(binary) : Buffer.from(bytes).toString("base64") return `data:${mime};base64,${base64}` } function resolveRelationshipTarget(baseDir: string, target: string): string { if (target.startsWith("/")) return target.slice(1) const baseParts = baseDir.split("/").filter(Boolean) for (const part of target.split("/")) { if (part === "..") baseParts.pop() else if (part !== ".") baseParts.push(part) } return baseParts.join("/") } export function resolveDocxMediaDataUrl( archive: DocxArchive, relsPath: string, rId: string ): string | null { const relsXml = decodeXml(archive[relsPath]) if (!relsXml) return null const target = parseDocxRelationships(relsXml).get(rId) if (!target) return null const baseDir = relsPath.replace(/\/_rels\/[^/]+\.rels$/, "") const mediaPath = resolveRelationshipTarget(baseDir, target) const bytes = archive[mediaPath] if (!bytes) return null return uint8ArrayToDataUrl(bytes, mimeFromMediaPath(mediaPath)) } function parseVmlColor(raw: string | undefined): string | undefined { if (!raw) return undefined const trimmed = raw.trim() if (!trimmed) return undefined if (trimmed.startsWith("#")) return trimmed.length === 7 ? trimmed : undefined if (/^[0-9a-f]{6}$/i.test(trimmed)) return `#${trimmed}` return parseWordColor(trimmed) } function parseFillImageMode(fillTag: string): DocPageFillImage["mode"] { const type = fillTag.match(/\btype="([^"]+)"/i)?.[1]?.toLowerCase() if (type === "tile") return "tile" if (type === "frame") return "frame" return "cover" } function parseGradientCss(fillTag: string): string | null { const type = fillTag.match(/\btype="([^"]+)"/i)?.[1]?.toLowerCase() const color = parseVmlColor(fillTag.match(/\bcolor="([^"]+)"/i)?.[1]) const color2 = parseVmlColor(fillTag.match(/\bcolor2="([^"]+)"/i)?.[1]) if (!color || !color2) return null const angleRaw = fillTag.match(/\bangle="([^"]+)"/i)?.[1] const angle = angleRaw ? Number.parseFloat(angleRaw) : 0 if (type === "gradientradial") { return `radial-gradient(circle, ${color}, ${color2})` } if (type === "gradient") { return `linear-gradient(${Number.isFinite(angle) ? angle : 0}deg, ${color}, ${color2})` } return null } export function parseDocumentBackgroundXml( documentXml: string, archive: DocxArchive ): { pageColor?: string; background?: DocPageBackground | null } { const block = documentXml.match(/]*>[\s\S]*?<\/w:background>/i)?.[0] ?? documentXml.match(/]*\/>/i)?.[0] if (!block) return {} const pageColor = parseWordColor(block.match(/\bw:color="([^"]+)"/i)?.[1]) const fillColor = parseVmlColor(block.match(/\bfillcolor="([^"]+)"/i)?.[1]) const resolvedColor = pageColor !== "#000000" || block.includes('w:color="') ? pageColor : fillColor ?? pageColor const fillTag = block.match(/]*\/?>/i)?.[0] let background: DocPageBackground | null = null if (fillTag) { const gradientCss = parseGradientCss(fillTag) if (gradientCss) { background = { gradientCss } } const rId = fillTag.match(/\br:id="([^"]+)"/i)?.[1] if (rId) { const src = resolveDocxMediaDataUrl(archive, "word/_rels/document.xml.rels", rId) if (src) { background = { ...(background ?? {}), fillImage: { src, mode: parseFillImageMode(fillTag) }, } } } } return { pageColor: resolvedColor !== "#000000" || block.includes('w:color="') ? resolvedColor : undefined, background, } } function parseRotationDeg(style: string | undefined): number { if (!style) return -35 const rotation = style.match(/(?:rotation|rotate):\s*(-?\d+(?:\.\d+)?)/i)?.[1] if (!rotation) return -35 const value = Number.parseFloat(rotation) return Number.isFinite(value) ? value : -35 } function parseOpacity(raw: string | undefined): number { if (!raw) return 0.35 const value = Number.parseFloat(raw) if (!Number.isFinite(value)) return 0.35 return Math.min(1, Math.max(0, value > 1 ? value / 100 : value)) } export function parseWatermarkFromHeaderXml( headerXml: string, archive: DocxArchive, relsPath: string ): DocPageWatermark | null { const shape = headerXml.match(/]*>[\s\S]*?<\/v:shape>/i)?.[0] if (!shape) return null const text = shape.match(/]*\bw:string="([^"]+)"/i)?.[1] ?? shape.match(/]*\bstring="([^"]+)"/i)?.[1] if (text && text.trim()) { const style = shape.match(/\bstyle="([^"]+)"/i)?.[1] const fill = shape.match(/]*\/?>/i)?.[0] const color = parseVmlColor(shape.match(/\bfillcolor="([^"]+)"/i)?.[1]) ?? parseVmlColor(fill?.match(/\bcolor="([^"]+)"/i)?.[1]) ?? "#b4b4b4" return { kind: "text", text: text.trim(), color, opacity: parseOpacity(fill?.match(/\bopacity="([^"]+)"/i)?.[1]), rotationDeg: parseRotationDeg(style), } } const imagedata = shape.match(/]*\/?>/i)?.[0] const rId = imagedata?.match(/\br:id="([^"]+)"/i)?.[1] if (rId) { const src = resolveDocxMediaDataUrl(archive, relsPath, rId) if (src) { return { kind: "image", src, opacity: parseOpacity(shape.match(/]*\bopacity="([^"]+)"/i)?.[1]), rotationDeg: parseRotationDeg(shape.match(/\bstyle="([^"]+)"/i)?.[1]), } } } return null } export function extractDocxPageBackground( archive: DocxArchive, documentXml: string ): { pageColor?: string; background?: DocPageBackground | null } { const fromDocument = parseDocumentBackgroundXml(documentXml, archive) let watermark: DocPageWatermark | null = null for (const key of Object.keys(archive)) { const headerMatch = key.match(/^word\/(header\d+)\.xml$/) if (!headerMatch) continue const headerName = headerMatch[1]! const relsPath = `word/_rels/${headerName}.xml.rels` const parsed = parseWatermarkFromHeaderXml(decodeXml(archive[key]), archive, relsPath) if (parsed) { watermark = parsed break } } const background: DocPageBackground | null = fromDocument.background || watermark ? { ...(fromDocument.background ?? {}), ...(watermark ? { watermark } : {}), } : null return { pageColor: fromDocument.pageColor, background, } } export function resolvePageBackgroundLayers( background: DocPageBackground | null | undefined ): DocPageBackgroundLayers { if (!background) return {} const layers: DocPageBackgroundLayers = {} if (background.gradientCss) { layers.gradientCss = background.gradientCss } if (background.fillImage?.src) { const mode = background.fillImage.mode layers.fillImageStyle = { backgroundImage: `url("${background.fillImage.src}")`, backgroundRepeat: mode === "tile" ? "repeat" : "no-repeat", backgroundSize: mode === "tile" ? "auto" : mode === "frame" ? "100% 100%" : "cover", backgroundPosition: "center", } } if (background.watermark) { layers.watermarkStyle = { text: background.watermark.kind === "text" ? background.watermark.text : undefined, imageSrc: background.watermark.kind === "image" ? background.watermark.src : undefined, color: background.watermark.color ?? "#b4b4b4", opacity: background.watermark.opacity ?? 0.35, rotationDeg: background.watermark.rotationDeg ?? -35, } } return layers }