sigit.io/src/utils/pdf.ts

256 lines
7.3 KiB
TypeScript
Raw Normal View History

2024-07-16 09:36:18 +00:00
import { PdfFile, PdfPage } from '../types/drawing.ts'
import * as PDFJS from 'pdfjs-dist'
import { PDFDocument } from 'pdf-lib'
import { Mark } from '../types/mark.ts'
2024-07-16 09:36:18 +00:00
PDFJS.GlobalWorkerOptions.workerSrc = 'node_modules/pdfjs-dist/build/pdf.worker.mjs';
/**
* Scale between the PDF page's natural size and rendered size
* @constant {number}
*/
const SCALE: number = 3;
/**
* Defined font size used when generating a PDF. Currently it is difficult to fully
* correlate font size used at the time of filling in / drawing on the PDF
* because it is dynamically rendered, and the final size.
* This should be fixed going forward.
* Switching to PDF-Lib will most likely make this problem redundant.
*/
const FONT_SIZE: number = 40;
/**
* Current font type used when generating a PDF.
*/
const FONT_TYPE: string = 'Arial';
/**
* Converts a PDF ArrayBuffer to a generic PDF File
* @param arrayBuffer of a PDF
* @param fileName identifier of the pdf file
*/
2024-07-16 09:36:18 +00:00
const toFile = (arrayBuffer: ArrayBuffer, fileName: string) : File => {
const blob = new Blob([arrayBuffer], { type: "application/pdf" });
return new File([blob], fileName, { type: "application/pdf" });
}
/**
* Converts a generic PDF File to Sigit's internal Pdf File type
* @param {File} file
* @return {PdfFile} Sigit's internal PDF File type
*/
2024-07-16 09:36:18 +00:00
const toPdfFile = async (file: File): Promise<PdfFile> => {
const data = await readPdf(file)
const pages = await pdfToImages(data)
return { file, pages, expanded: false }
}
/**
* Transforms an array of generic PDF Files into an array of Sigit's
* internal representation of Pdf Files
* @param selectedFiles - an array of generic PDF Files
* @return PdfFile[] - an array of Sigit's internal Pdf File type
*/
2024-07-16 09:36:18 +00:00
const toPdfFiles = async (selectedFiles: File[]): Promise<PdfFile[]> => {
return Promise.all(selectedFiles
.filter(isPdf)
.map(toPdfFile));
2024-07-16 09:36:18 +00:00
}
/**
* A utility that transforms a drawing coordinate number into a CSS-compatible string
* @param coordinate
*/
const inPx = (coordinate: number): string => `${coordinate}px`;
/**
* A utility that checks if a given file is of the pdf type
* @param file
*/
2024-07-16 09:36:18 +00:00
const isPdf = (file: File) => file.type.toLowerCase().includes('pdf');
/**
* Reads the pdf file binaries
*/
const readPdf = (file: File): Promise<string> => {
2024-07-16 09:36:18 +00:00
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e: any) => {
const data = e.target.result
resolve(data)
};
reader.onerror = (err) => {
console.error('err', err)
reject(err)
};
reader.readAsDataURL(file);
})
}
/**
* Converts pdf to the images
* @param data pdf file bytes
*/
const pdfToImages = async (data: any): Promise<PdfPage[]> => {
const images: string[] = [];
const pdf = await PDFJS.getDocument(data).promise;
const canvas = document.createElement("canvas");
for (let i = 0; i < pdf.numPages; i++) {
const page = await pdf.getPage(i + 1);
const viewport = page.getViewport({ scale: SCALE });
2024-07-16 09:36:18 +00:00
const context = canvas.getContext("2d");
canvas.height = viewport.height;
canvas.width = viewport.width;
await page.render({ canvasContext: context!, viewport: viewport }).promise;
images.push(canvas.toDataURL());
}
return Promise.resolve(images.map((image) => {
return {
image,
drawnFields: []
}
}))
}
/**
* Takes in individual pdf file and an object with Marks grouped by Page number
* Returns an array of encoded images where each image is a representation
* of a PDF page with completed and signed marks from all users
*/
const addMarks = async (file: File, marksPerPage: {[key: string]: Mark[]}) => {
const p = await readPdf(file);
const pdf = await PDFJS.getDocument(p).promise;
const canvas = document.createElement("canvas");
const images: string[] = [];
for (let i = 0; i< pdf.numPages; i++) {
const page = await pdf.getPage(i+1)
const viewport = page.getViewport({ scale: SCALE });
const context = canvas.getContext("2d");
canvas.height = viewport.height;
canvas.width = viewport.width;
await page.render({ canvasContext: context!, viewport: viewport }).promise;
marksPerPage[i].forEach(mark => draw(mark, context!))
images.push(canvas.toDataURL());
}
return Promise.resolve(images);
}
/**
* Utility to scale mark in line with the PDF-to-PNG scale
*/
const scaleMark = (mark: Mark): Mark => {
const { location } = mark;
return {
...mark,
location: {
...location,
width: location.width * SCALE,
height: location.height * SCALE,
left: location.left * SCALE,
top: location.top * SCALE
}
}
}
/**
* Utility to check if a Mark has value
* @param mark
*/
const hasValue = (mark: Mark): boolean => !!mark.value;
/**
* Draws a Mark on a Canvas representation of a PDF Page
* @param mark to be drawn
* @param ctx a Canvas representation of a specific PDF Page
*/
const draw = (mark: Mark, ctx: CanvasRenderingContext2D) => {
const { location } = mark;
ctx!.font = FONT_SIZE + 'px ' + FONT_TYPE;
ctx!.fillStyle = 'black';
const textMetrics = ctx!.measureText(mark.value!);
const textX = location.left + (location.width - textMetrics.width) / 2;
const textY = location.top + (location.height + parseInt(ctx!.font)) / 2;
ctx!.fillText(mark.value!, textX, textY);
}
/**
* Takes an array of encoded PDF pages and returns a blob that is a complete PDF file
* @param markedPdfPages
*/
const convertToPdfBlob = async (markedPdfPages: string[]): Promise<Blob> => {
const pdfDoc = await PDFDocument.create();
for (const page of markedPdfPages) {
const pngImage = await pdfDoc.embedPng(page)
const p = pdfDoc.addPage([pngImage.width, pngImage.height])
p.drawImage(pngImage, {
x: 0,
y: 0,
width: pngImage.width,
height: pngImage.height
})
}
const pdfBytes = await pdfDoc.save()
return new Blob([pdfBytes], { type: 'application/pdf' })
}
/**
* Takes an ArrayBuffer of a PDF file and converts to Sigit's Internal Pdf File type
* @param arrayBuffer
* @param fileName
*/
const convertToPdfFile = async (arrayBuffer: ArrayBuffer, fileName: string): Promise<PdfFile> => {
const file = toFile(arrayBuffer, fileName);
return toPdfFile(file);
}
/**
* @param marks - an array of Marks
* @function hasValue removes any Mark without a property
* @function scaleMark scales remaining marks in line with SCALE
* @function byPage groups remaining Marks by their page marks.location.page
*/
const groupMarksByPage = (marks: Mark[]) => {
return marks
.filter(hasValue)
.map(scaleMark)
.reduce<{[key: number]: Mark[]}>(byPage, {})
}
/**
* A reducer callback that transforms an array of marks into an object grouped by the page number
* Can be replaced by Object.groupBy https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/groupBy
* when it is implemented in TypeScript
* Implementation is standard from the Array.prototype.reduce documentation
* @param obj - accumulator in the reducer callback
* @param mark - current value, i.e. Mark being examined
*/
const byPage = (obj: { [key: number]: Mark[]}, mark: Mark) => {
const key = mark.location.page;
const curGroup = obj[key] ?? [];
return { ...obj, [key]: [...curGroup, mark]
}
}
2024-07-16 09:36:18 +00:00
export {
toFile,
toPdfFile,
toPdfFiles,
inPx,
convertToPdfFile,
addMarks,
convertToPdfBlob,
groupMarksByPage,
2024-07-16 09:36:18 +00:00
}