ultisuite-client/lib/drive/richtext-import.ts
R3D347HR4Y 8e420509a8
Some checks are pending
E2E / Playwright e2e (push) Waiting to run
imports docx 1
2026-06-10 00:27:44 +02:00

209 lines
6.5 KiB
TypeScript

import mammoth from "mammoth"
import {
extractDocxPageSetup,
type DocPageSetup,
} from "@/lib/drive/doc-page-setup"
export type TipTapJSON = Record<string, unknown>
export type RichTextImportResult = {
content: TipTapJSON
pageSetup?: DocPageSetup | null
}
const IMAGE_ATTR_KEYS = ["src", "alt", "title", "width", "height"] as const
function imageNodeFromElement(el: HTMLElement): TipTapJSON | null {
const src = el.getAttribute("src")
if (!src) return null
const attrs: Record<string, unknown> = {
src,
alt: el.getAttribute("alt") ?? "",
}
const title = el.getAttribute("title")
if (title) attrs.title = title
const width = el.getAttribute("width")
const height = el.getAttribute("height")
if (width) attrs.width = Number(width) || width
if (height) attrs.height = Number(height) || height
return { type: "image", attrs }
}
function inlineContentFromElement(el: HTMLElement): TipTapJSON[] {
const content: TipTapJSON[] = []
for (const node of el.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent ?? ""
if (text) content.push({ type: "text", text })
continue
}
if (node.nodeType !== Node.ELEMENT_NODE) continue
const child = node as HTMLElement
const tag = child.tagName.toLowerCase()
if (tag === "img") {
const image = imageNodeFromElement(child)
if (image) content.push(image)
continue
}
content.push(...inlineContentFromElement(child))
}
return content
}
/** Keep only attrs supported by @tiptap/extension-image. */
export function normalizeImportedTipTap(content: TipTapJSON): TipTapJSON {
const walk = (node: unknown): unknown => {
if (!node || typeof node !== "object") return node
if (Array.isArray(node)) return node.map(walk)
const record = node as TipTapJSON
if (record.type === "image" && record.attrs && typeof record.attrs === "object") {
const raw = record.attrs as Record<string, unknown>
const attrs: Record<string, unknown> = {}
for (const key of IMAGE_ATTR_KEYS) {
const value = raw[key]
if (value != null && value !== "") attrs[key] = value
}
if (typeof attrs.src !== "string" || !attrs.src) {
return null
}
return { ...record, attrs }
}
if (Array.isArray(record.content)) {
const nextContent = record.content.map(walk).filter(Boolean)
return { ...record, content: nextContent }
}
return record
}
const normalized = walk(content)
if (!normalized || typeof normalized !== "object") {
return { type: "doc", content: [{ type: "paragraph" }] }
}
return normalized as TipTapJSON
}
function htmlToTipTapDoc(html: string): TipTapJSON {
const parser = typeof DOMParser !== "undefined" ? new DOMParser() : null
if (!parser) {
return {
type: "doc",
content: [{ type: "paragraph", content: [{ type: "text", text: html.replace(/<[^>]+>/g, " ") }] }],
}
}
const doc = parser.parseFromString(html, "text/html")
const blocks: TipTapJSON[] = []
const walk = (node: Node) => {
if (node.nodeType === Node.TEXT_NODE) {
const text = node.textContent ?? ""
if (text.trim()) {
blocks.push({ type: "paragraph", content: [{ type: "text", text }] })
}
return
}
if (node.nodeType !== Node.ELEMENT_NODE) return
const el = node as HTMLElement
const tag = el.tagName.toLowerCase()
if (tag === "img") {
const image = imageNodeFromElement(el)
if (image) {
blocks.push({ type: "paragraph", content: [image] })
}
return
}
if (tag === "p" || tag === "div") {
const content = inlineContentFromElement(el)
blocks.push({
type: "paragraph",
content: content.length ? content : [],
})
return
}
if (/^h[1-6]$/.test(tag)) {
const level = Number(tag[1])
blocks.push({
type: "heading",
attrs: { level },
content: [{ type: "text", text: el.textContent ?? "" }],
})
return
}
if (tag === "ul" || tag === "ol") {
const listType = tag === "ul" ? "bulletList" : "orderedList"
const items = Array.from(el.querySelectorAll(":scope > li")).map((li) => ({
type: "listItem",
content: [{ type: "paragraph", content: inlineContentFromElement(li) }],
}))
blocks.push({ type: listType, content: items })
return
}
Array.from(el.childNodes).forEach(walk)
}
Array.from(doc.body.childNodes).forEach(walk)
if (blocks.length === 0) {
blocks.push({ type: "paragraph" })
}
return normalizeImportedTipTap({ type: "doc", content: blocks })
}
export async function importDocxToTipTap(buffer: ArrayBuffer): Promise<RichTextImportResult> {
const pageSetup = await extractDocxPageSetup(buffer)
try {
const { parseDOCX } = await import("@docen/import-docx")
const content = await parseDOCX(buffer, { image: { crop: false } })
if (content && typeof content === "object") {
return { content: normalizeImportedTipTap(content as TipTapJSON), pageSetup }
}
} catch (error) {
if (process.env.NODE_ENV !== "production") {
console.warn("[richtext-import] parseDOCX failed, falling back to mammoth", error)
}
}
const result = await mammoth.convertToHtml(
{ arrayBuffer: buffer },
{
convertImage: mammoth.images.imgElement((image) =>
image.read("base64").then((imageBuffer) => ({
src: `data:${image.contentType};base64,${imageBuffer}`,
}))
),
}
)
return { content: htmlToTipTapDoc(result.value), pageSetup }
}
export async function exportTipTapToDocx(content: TipTapJSON): Promise<Blob> {
try {
const { generateDOCX } = await import("@docen/export-docx")
const buf = await generateDOCX(content, { outputType: "blob" })
if (buf instanceof Blob) return buf
} catch {
/* fallback unavailable */
}
throw new Error("Export DOCX indisponible")
}
export async function importFileToTipTap(
fileName: string,
buffer: ArrayBuffer
): Promise<RichTextImportResult> {
const ext = fileName.split(".").pop()?.toLowerCase() ?? ""
if (ext === "docx" || ext === "docm") {
return importDocxToTipTap(buffer)
}
const text = new TextDecoder().decode(buffer)
if (ext === "html" || ext === "htm") {
return { content: htmlToTipTapDoc(text) }
}
const lines = text.split(/\r?\n/)
return {
content: {
type: "doc",
content: lines.map((line) => ({
type: "paragraph",
content: line ? [{ type: "text", text: line }] : [],
})),
},
}
}