315 lines
9.4 KiB
TypeScript
315 lines
9.4 KiB
TypeScript
type DocxArchive = Record<string, Uint8Array>
|
|
|
|
function parseWordColor(raw: string | undefined): string {
|
|
if (!raw || raw.toLowerCase() === "auto") return "#000000"
|
|
if (raw.startsWith("#")) return raw
|
|
if (/^[0-9a-f]{6}$/i.test(raw)) return `#${raw}`
|
|
return "#000000"
|
|
}
|
|
|
|
export type DocPageFillImage = {
|
|
src: string
|
|
mode: "tile" | "frame" | "cover"
|
|
}
|
|
|
|
export type DocPageWatermark = {
|
|
kind: "text" | "image"
|
|
text?: string
|
|
src?: string
|
|
color?: string
|
|
opacity?: number
|
|
rotationDeg?: number
|
|
}
|
|
|
|
export type DocPageBackground = {
|
|
fillImage?: DocPageFillImage | null
|
|
watermark?: DocPageWatermark | null
|
|
gradientCss?: string | null
|
|
}
|
|
|
|
export type DocPageBackgroundLayers = {
|
|
fillImageStyle?: {
|
|
backgroundImage: string
|
|
backgroundSize: string
|
|
backgroundRepeat: string
|
|
backgroundPosition: string
|
|
}
|
|
watermarkStyle?: {
|
|
text?: string
|
|
imageSrc?: string
|
|
color: string
|
|
opacity: number
|
|
rotationDeg: number
|
|
}
|
|
gradientCss?: string
|
|
}
|
|
|
|
function decodeXml(bytes: Uint8Array | undefined): string {
|
|
if (!bytes) return ""
|
|
return new TextDecoder().decode(bytes)
|
|
}
|
|
|
|
export function parseDocxRelationships(relsXml: string): Map<string, string> {
|
|
const map = new Map<string, string>()
|
|
for (const match of relsXml.matchAll(/<Relationship\b[^>]*\/?>/gi)) {
|
|
const id = match[0].match(/\bId="([^"]+)"/i)?.[1]
|
|
const target = match[0].match(/\bTarget="([^"]+)"/i)?.[1]
|
|
if (id && target) map.set(id, target)
|
|
}
|
|
return map
|
|
}
|
|
|
|
function mimeFromMediaPath(path: string): string {
|
|
const ext = path.split(".").pop()?.toLowerCase()
|
|
switch (ext) {
|
|
case "png":
|
|
return "image/png"
|
|
case "jpg":
|
|
case "jpeg":
|
|
return "image/jpeg"
|
|
case "gif":
|
|
return "image/gif"
|
|
case "webp":
|
|
return "image/webp"
|
|
case "bmp":
|
|
return "image/bmp"
|
|
default:
|
|
return "application/octet-stream"
|
|
}
|
|
}
|
|
|
|
export function uint8ArrayToDataUrl(bytes: Uint8Array, mime: string): string {
|
|
let binary = ""
|
|
for (let i = 0; i < bytes.length; i++) {
|
|
binary += String.fromCharCode(bytes[i]!)
|
|
}
|
|
const base64 = typeof btoa === "function" ? btoa(binary) : Buffer.from(bytes).toString("base64")
|
|
return `data:${mime};base64,${base64}`
|
|
}
|
|
|
|
function resolveRelationshipTarget(baseDir: string, target: string): string {
|
|
if (target.startsWith("/")) return target.slice(1)
|
|
const baseParts = baseDir.split("/").filter(Boolean)
|
|
for (const part of target.split("/")) {
|
|
if (part === "..") baseParts.pop()
|
|
else if (part !== ".") baseParts.push(part)
|
|
}
|
|
return baseParts.join("/")
|
|
}
|
|
|
|
export function resolveDocxMediaDataUrl(
|
|
archive: DocxArchive,
|
|
relsPath: string,
|
|
rId: string
|
|
): string | null {
|
|
const relsXml = decodeXml(archive[relsPath])
|
|
if (!relsXml) return null
|
|
const target = parseDocxRelationships(relsXml).get(rId)
|
|
if (!target) return null
|
|
const baseDir = relsPath.replace(/\/_rels\/[^/]+\.rels$/, "")
|
|
const mediaPath = resolveRelationshipTarget(baseDir, target)
|
|
const bytes = archive[mediaPath]
|
|
if (!bytes) return null
|
|
return uint8ArrayToDataUrl(bytes, mimeFromMediaPath(mediaPath))
|
|
}
|
|
|
|
function parseVmlColor(raw: string | undefined): string | undefined {
|
|
if (!raw) return undefined
|
|
const trimmed = raw.trim()
|
|
if (!trimmed) return undefined
|
|
if (trimmed.startsWith("#")) return trimmed.length === 7 ? trimmed : undefined
|
|
if (/^[0-9a-f]{6}$/i.test(trimmed)) return `#${trimmed}`
|
|
return parseWordColor(trimmed)
|
|
}
|
|
|
|
function parseFillImageMode(fillTag: string): DocPageFillImage["mode"] {
|
|
const type = fillTag.match(/\btype="([^"]+)"/i)?.[1]?.toLowerCase()
|
|
if (type === "tile") return "tile"
|
|
if (type === "frame") return "frame"
|
|
return "cover"
|
|
}
|
|
|
|
function parseGradientCss(fillTag: string): string | null {
|
|
const type = fillTag.match(/\btype="([^"]+)"/i)?.[1]?.toLowerCase()
|
|
const color = parseVmlColor(fillTag.match(/\bcolor="([^"]+)"/i)?.[1])
|
|
const color2 = parseVmlColor(fillTag.match(/\bcolor2="([^"]+)"/i)?.[1])
|
|
if (!color || !color2) return null
|
|
const angleRaw = fillTag.match(/\bangle="([^"]+)"/i)?.[1]
|
|
const angle = angleRaw ? Number.parseFloat(angleRaw) : 0
|
|
if (type === "gradientradial") {
|
|
return `radial-gradient(circle, ${color}, ${color2})`
|
|
}
|
|
if (type === "gradient") {
|
|
return `linear-gradient(${Number.isFinite(angle) ? angle : 0}deg, ${color}, ${color2})`
|
|
}
|
|
return null
|
|
}
|
|
|
|
export function parseDocumentBackgroundXml(
|
|
documentXml: string,
|
|
archive: DocxArchive
|
|
): { pageColor?: string; background?: DocPageBackground | null } {
|
|
const block =
|
|
documentXml.match(/<w:background\b[^>]*>[\s\S]*?<\/w:background>/i)?.[0] ??
|
|
documentXml.match(/<w:background\b[^>]*\/>/i)?.[0]
|
|
if (!block) return {}
|
|
|
|
const pageColor = parseWordColor(block.match(/\bw:color="([^"]+)"/i)?.[1])
|
|
const fillColor = parseVmlColor(block.match(/\bfillcolor="([^"]+)"/i)?.[1])
|
|
const resolvedColor =
|
|
pageColor !== "#000000" || block.includes('w:color="')
|
|
? pageColor
|
|
: fillColor ?? pageColor
|
|
|
|
const fillTag = block.match(/<v:fill\b[^>]*\/?>/i)?.[0]
|
|
let background: DocPageBackground | null = null
|
|
|
|
if (fillTag) {
|
|
const gradientCss = parseGradientCss(fillTag)
|
|
if (gradientCss) {
|
|
background = { gradientCss }
|
|
}
|
|
const rId = fillTag.match(/\br:id="([^"]+)"/i)?.[1]
|
|
if (rId) {
|
|
const src = resolveDocxMediaDataUrl(archive, "word/_rels/document.xml.rels", rId)
|
|
if (src) {
|
|
background = {
|
|
...(background ?? {}),
|
|
fillImage: { src, mode: parseFillImageMode(fillTag) },
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
pageColor: resolvedColor !== "#000000" || block.includes('w:color="') ? resolvedColor : undefined,
|
|
background,
|
|
}
|
|
}
|
|
|
|
function parseRotationDeg(style: string | undefined): number {
|
|
if (!style) return -35
|
|
const rotation = style.match(/(?:rotation|rotate):\s*(-?\d+(?:\.\d+)?)/i)?.[1]
|
|
if (!rotation) return -35
|
|
const value = Number.parseFloat(rotation)
|
|
return Number.isFinite(value) ? value : -35
|
|
}
|
|
|
|
function parseOpacity(raw: string | undefined): number {
|
|
if (!raw) return 0.35
|
|
const value = Number.parseFloat(raw)
|
|
if (!Number.isFinite(value)) return 0.35
|
|
return Math.min(1, Math.max(0, value > 1 ? value / 100 : value))
|
|
}
|
|
|
|
export function parseWatermarkFromHeaderXml(
|
|
headerXml: string,
|
|
archive: DocxArchive,
|
|
relsPath: string
|
|
): DocPageWatermark | null {
|
|
const shape = headerXml.match(/<v:shape\b[^>]*>[\s\S]*?<\/v:shape>/i)?.[0]
|
|
if (!shape) return null
|
|
|
|
const text =
|
|
shape.match(/<v:textpath\b[^>]*\bw:string="([^"]+)"/i)?.[1] ??
|
|
shape.match(/<v:textpath\b[^>]*\bstring="([^"]+)"/i)?.[1]
|
|
if (text && text.trim()) {
|
|
const style = shape.match(/\bstyle="([^"]+)"/i)?.[1]
|
|
const fill = shape.match(/<v:fill\b[^>]*\/?>/i)?.[0]
|
|
const color =
|
|
parseVmlColor(shape.match(/\bfillcolor="([^"]+)"/i)?.[1]) ??
|
|
parseVmlColor(fill?.match(/\bcolor="([^"]+)"/i)?.[1]) ??
|
|
"#b4b4b4"
|
|
return {
|
|
kind: "text",
|
|
text: text.trim(),
|
|
color,
|
|
opacity: parseOpacity(fill?.match(/\bopacity="([^"]+)"/i)?.[1]),
|
|
rotationDeg: parseRotationDeg(style),
|
|
}
|
|
}
|
|
|
|
const imagedata = shape.match(/<v:imagedata\b[^>]*\/?>/i)?.[0]
|
|
const rId = imagedata?.match(/\br:id="([^"]+)"/i)?.[1]
|
|
if (rId) {
|
|
const src = resolveDocxMediaDataUrl(archive, relsPath, rId)
|
|
if (src) {
|
|
return {
|
|
kind: "image",
|
|
src,
|
|
opacity: parseOpacity(shape.match(/<v:fill\b[^>]*\bopacity="([^"]+)"/i)?.[1]),
|
|
rotationDeg: parseRotationDeg(shape.match(/\bstyle="([^"]+)"/i)?.[1]),
|
|
}
|
|
}
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
export function extractDocxPageBackground(
|
|
archive: DocxArchive,
|
|
documentXml: string
|
|
): { pageColor?: string; background?: DocPageBackground | null } {
|
|
const fromDocument = parseDocumentBackgroundXml(documentXml, archive)
|
|
let watermark: DocPageWatermark | null = null
|
|
|
|
for (const key of Object.keys(archive)) {
|
|
const headerMatch = key.match(/^word\/(header\d+)\.xml$/)
|
|
if (!headerMatch) continue
|
|
const headerName = headerMatch[1]!
|
|
const relsPath = `word/_rels/${headerName}.xml.rels`
|
|
const parsed = parseWatermarkFromHeaderXml(decodeXml(archive[key]), archive, relsPath)
|
|
if (parsed) {
|
|
watermark = parsed
|
|
break
|
|
}
|
|
}
|
|
|
|
const background: DocPageBackground | null =
|
|
fromDocument.background || watermark
|
|
? {
|
|
...(fromDocument.background ?? {}),
|
|
...(watermark ? { watermark } : {}),
|
|
}
|
|
: null
|
|
|
|
return {
|
|
pageColor: fromDocument.pageColor,
|
|
background,
|
|
}
|
|
}
|
|
|
|
export function resolvePageBackgroundLayers(
|
|
background: DocPageBackground | null | undefined
|
|
): DocPageBackgroundLayers {
|
|
if (!background) return {}
|
|
|
|
const layers: DocPageBackgroundLayers = {}
|
|
|
|
if (background.gradientCss) {
|
|
layers.gradientCss = background.gradientCss
|
|
}
|
|
|
|
if (background.fillImage?.src) {
|
|
const mode = background.fillImage.mode
|
|
layers.fillImageStyle = {
|
|
backgroundImage: `url("${background.fillImage.src}")`,
|
|
backgroundRepeat: mode === "tile" ? "repeat" : "no-repeat",
|
|
backgroundSize: mode === "tile" ? "auto" : mode === "frame" ? "100% 100%" : "cover",
|
|
backgroundPosition: "center",
|
|
}
|
|
}
|
|
|
|
if (background.watermark) {
|
|
layers.watermarkStyle = {
|
|
text: background.watermark.kind === "text" ? background.watermark.text : undefined,
|
|
imageSrc: background.watermark.kind === "image" ? background.watermark.src : undefined,
|
|
color: background.watermark.color ?? "#b4b4b4",
|
|
opacity: background.watermark.opacity ?? 0.35,
|
|
rotationDeg: background.watermark.rotationDeg ?? -35,
|
|
}
|
|
}
|
|
|
|
return layers
|
|
}
|