sigit.io/src/utils/pdf.ts

252 lines
7.1 KiB
TypeScript
Raw Normal View History

import { PdfPage } from '../types/drawing.ts'
import { PDFDocument } from 'pdf-lib'
import { Mark } from '../types/mark.ts'
2024-09-02 12:59:35 +02:00
import * as PDFJS from 'pdfjs-dist'
2024-09-03 16:46:54 +02:00
import PDFJSWorker from 'pdfjs-dist/build/pdf.worker.min.mjs?worker'
2024-09-02 12:59:35 +02:00
if (!PDFJS.GlobalWorkerOptions.workerPort) {
// Use workerPort and allow worker to be shared between all getDocument calls
const worker = new PDFJSWorker()
PDFJS.GlobalWorkerOptions.workerPort = worker
}
2024-07-16 12:36:18 +03:00
/**
* Defined font size used when generating a PDF. Currently it is difficult to fully
* correlate font size used at the time of filling in / drawing on the PDF
* because it is dynamically rendered, and the final size.
* This should be fixed going forward.
* Switching to PDF-Lib will most likely make this problem redundant.
*/
export const FONT_SIZE: number = 16
/**
* Current font type used when generating a PDF.
*/
export const FONT_TYPE: string = 'Arial'
2024-07-16 12:36:18 +03:00
/**
* A utility that transforms a drawing coordinate number into a CSS-compatible pixel string
* @param coordinate
*/
export const inPx = (coordinate: number): string => `${coordinate}px`
/**
* A utility that checks if a given file is of the pdf type
* @param file
*/
export const isPdf = (file: File) => file.type.toLowerCase().includes('pdf')
2024-07-16 12:36:18 +03:00
/**
* Reads the pdf file binaries
*/
export const readPdf = (file: File): Promise<string | ArrayBuffer> => {
2024-07-16 12:36:18 +03:00
return new Promise((resolve, reject) => {
const reader = new FileReader()
2024-07-16 12:36:18 +03:00
reader.onload = (e) => {
const data = e.target?.result
// Make sure we only resolve for string or ArrayBuffer type
// They are accepted by PDFJS.getDocument function
if (data && typeof data !== 'undefined') {
resolve(data)
} else {
reject(new Error('File is null or undefined'))
}
}
2024-07-16 12:36:18 +03:00
reader.onerror = (err) => {
console.error('err', err)
reject(err)
}
2024-07-16 12:36:18 +03:00
reader.readAsDataURL(file)
2024-07-16 12:36:18 +03:00
})
}
2024-08-28 11:41:29 +02:00
export const getInnerContentWidth = () => {
// Fetch the first container element we find
const element = document.querySelector('#content-preview')
if (element) {
const style = getComputedStyle(element)
// Calculate width without padding
const widthWithoutPadding =
element.clientWidth - parseFloat(style.padding) * 2
return widthWithoutPadding
}
// Default value
return 620
}
2024-07-16 12:36:18 +03:00
/**
* Converts pdf to the images
* @param data pdf file bytes
*/
export const pdfToImages = async (
data: string | ArrayBuffer
): Promise<PdfPage[]> => {
const pages: PdfPage[] = []
const pdf = await PDFJS.getDocument(data).promise
const canvas = document.createElement('canvas')
2024-08-28 11:41:29 +02:00
const width = getInnerContentWidth()
2024-07-16 12:36:18 +03:00
for (let i = 0; i < pdf.numPages; i++) {
const page = await pdf.getPage(i + 1)
const originalViewport = page.getViewport({ scale: 1 })
const scale = width / originalViewport.width
const viewport = page.getViewport({ scale: scale })
const context = canvas.getContext('2d')
canvas.height = viewport.height
canvas.width = viewport.width
await page.render({ canvasContext: context!, viewport: viewport }).promise
pages.push({
image: canvas.toDataURL(),
width: originalViewport.width,
drawnFields: []
})
2024-07-16 12:36:18 +03:00
}
return pages
2024-07-16 12:36:18 +03:00
}
/**
* Takes in individual pdf file and an object with Marks grouped by Page number
* Returns an array of encoded images where each image is a representation
* of a PDF page with completed and signed marks from all users
*/
export const addMarks = async (
file: File,
marksPerPage: { [key: string]: Mark[] }
) => {
const p = await readPdf(file)
const pdf = await PDFJS.getDocument(p).promise
const canvas = document.createElement('canvas')
const images: string[] = []
for (let i = 0; i < pdf.numPages; i++) {
const page = await pdf.getPage(i + 1)
const viewport = page.getViewport({ scale: 1 })
const context = canvas.getContext('2d')
canvas.height = viewport.height
canvas.width = viewport.width
if (context) {
await page.render({ canvasContext: context, viewport: viewport }).promise
if (marksPerPage && Object.hasOwn(marksPerPage, i)) {
marksPerPage[i]?.forEach((mark) => draw(mark, context))
}
images.push(canvas.toDataURL())
}
}
canvas.remove()
return images
}
/**
* Utility to scale mark in line with the PDF-to-PNG scale
*/
export const scaleMark = (mark: Mark, scale: number): Mark => {
const { location } = mark
return {
...mark,
location: {
...location,
width: location.width * scale,
height: location.height * scale,
left: location.left * scale,
top: location.top * scale
}
}
}
/**
* Utility to check if a Mark has value
* @param mark
*/
export const hasValue = (mark: Mark): boolean => !!mark.value
/**
* Draws a Mark on a Canvas representation of a PDF Page
* @param mark to be drawn
* @param ctx a Canvas representation of a specific PDF Page
*/
export const draw = (mark: Mark, ctx: CanvasRenderingContext2D) => {
const { location } = mark
ctx.font = FONT_SIZE + 'px ' + FONT_TYPE
ctx.fillStyle = 'black'
const textMetrics = ctx.measureText(mark.value!)
const textHeight =
textMetrics.actualBoundingBoxAscent + textMetrics.actualBoundingBoxDescent
const textX = location.left + (location.width - textMetrics.width) / 2
const textY = location.top + (location.height + textHeight) / 2
ctx.fillText(mark.value!, textX, textY)
}
/**
* Takes an array of encoded PDF pages and returns a blob that is a complete PDF file
* @param markedPdfPages
*/
export const convertToPdfBlob = async (
markedPdfPages: string[]
): Promise<Blob> => {
const pdfDoc = await PDFDocument.create()
for (const page of markedPdfPages) {
const pngImage = await pdfDoc.embedPng(page)
const p = pdfDoc.addPage([pngImage.width, pngImage.height])
p.drawImage(pngImage, {
x: 0,
y: 0,
width: pngImage.width,
height: pngImage.height
})
}
const pdfBytes = await pdfDoc.save()
return new Blob([pdfBytes], { type: 'application/pdf' })
}
/**
* @param marks - an array of Marks
* @function hasValue removes any Mark without a property
* @function byPage groups remaining Marks by their page marks.location.page
*/
export const groupMarksByFileNamePage = (marks: Mark[]) => {
return marks
.filter(hasValue)
.reduce<{ [fileName: string]: { [page: number]: Mark[] } }>(byPage, {})
}
/**
* A reducer callback that transforms an array of marks into an object grouped by the page number
* Can be replaced by Object.groupBy https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/groupBy
* when it is implemented in TypeScript
* Implementation is standard from the Array.prototype.reduce documentation
* @param obj - accumulator in the reducer callback
* @param mark - current value, i.e. Mark being examined
*/
export const byPage = (
obj: { [filename: string]: { [page: number]: Mark[] } },
mark: Mark
) => {
const fileName = mark.fileName
const pageNumber = mark.location.page
const pages = obj[fileName] ?? {}
const marks = pages[pageNumber] ?? []
return {
...obj,
[fileName]: {
...pages,
[pageNumber]: [...marks, mark]
}
}
}