sigit.io/src/utils/pdf.ts

import { PdfPage } from '../types/drawing.ts'
import { PDFDocument, PDFFont, PDFPage, rgb } from 'pdf-lib'
import { Mark } from '../types/mark.ts'
import * as PDFJS from 'pdfjs-dist'
import PDFJSWorker from 'pdfjs-dist/build/pdf.worker.min.mjs?worker'
if (!PDFJS.GlobalWorkerOptions.workerPort) {
  // Use workerPort and allow worker to be shared between all getDocument calls
  const worker = new PDFJSWorker()
  PDFJS.GlobalWorkerOptions.workerPort = worker
}

import fontkit from '@pdf-lib/fontkit'
import defaultFont from '../assets/fonts/roboto-regular.ttf'

/**
 * Defined font size used when generating a PDF. Currently it is difficult to fully
 * correlate font size used at the time of filling in / drawing on the PDF
 * because it is dynamically rendered, and the final size.
 */
export const FONT_SIZE: number = 16
/**
 * Current font type used when generating a PDF.
 */
export const FONT_TYPE: string = 'Roboto'
/**
 * Current line height used when generating a PDF.
 */
export const FONT_LINE_HEIGHT: number = 1

/**
 * A utility that transforms a drawing coordinate number into a CSS-compatible pixel string
 * @param coordinate
 */
export const inPx = (coordinate: number): string => `${coordinate}px`

/**
 * A utility that checks if a given file is of the pdf type
 * @param file
 */
export const isPdf = (file: File) => file.type.toLowerCase().includes('pdf')

/**
 * Reads the pdf file binaries
 */
export const readPdf = (file: File): Promise<string | ArrayBuffer> => {
  return new Promise((resolve, reject) => {
    const reader = new FileReader()

    reader.onload = (e) => {
      const data = e.target?.result
      // Make sure we only resolve for string or ArrayBuffer type
      // They are accepted by PDFJS.getDocument function
      if (data && typeof data !== 'undefined') {
        resolve(data)
      } else {
        reject(new Error('File is null or undefined'))
      }
    }

    reader.onerror = (err) => {
      console.error('err', err)
      reject(err)
    }

    reader.readAsDataURL(file)
  })
}

export const getInnerContentWidth = () => {
  // Fetch the first container element we find
  const element = document.querySelector('#content-preview')

  if (element) {
    const style = getComputedStyle(element)

    // Calculate width without padding
    const widthWithoutPadding =
      element.clientWidth - parseFloat(style.padding) * 2

    return widthWithoutPadding
  }

  // Default value
  return 620
}

/**
 * Converts pdf to the images
 * @param data pdf file bytes
 */
export const pdfToImages = async (
  data: string | ArrayBuffer
): Promise<PdfPage[]> => {
  const pages: PdfPage[] = []
  const pdf = await PDFJS.getDocument(data).promise
  const canvas = document.createElement('canvas')
  const width = getInnerContentWidth()

  for (let i = 0; i < pdf.numPages; i++) {
    const page = await pdf.getPage(i + 1)

    const originalViewport = page.getViewport({ scale: 1 })
    const scale = width / originalViewport.width
    const viewport = page.getViewport({ scale: scale })
    const context = canvas.getContext('2d')
    canvas.height = viewport.height
    canvas.width = viewport.width

    await page.render({ canvasContext: context!, viewport: viewport }).promise
    pages.push({
      image: canvas.toDataURL(),
      width: originalViewport.width,
      drawnFields: []
    })
  }

  return pages
}

/**
 * Takes in individual pdf file and an object with Marks grouped by Page number
 * Returns a PDF blob with embedded, completed and signed marks from all users as text
 */
export const addMarks = async (
  file: File,
  marksPerPage: { [key: string]: Mark[] }
) => {
  const p = await readPdf(file)
  const pdf = await PDFDocument.load(p)
  const robotoFont = await embedFont(pdf)
  const pages = pdf.getPages()

  for (let i = 0; i < pages.length; i++) {
    if (marksPerPage && Object.hasOwn(marksPerPage, i)) {
      marksPerPage[i]?.forEach((mark) =>
        drawMarkText(mark, pages[i], robotoFont)
      )
    }
  }

  const blob = await pdf.save()

  return blob
}

/**
 * Utility to check if a Mark has value
 * @param mark
 */
export const hasValue = (mark: Mark): boolean => !!mark.value

/**
 * Draws a Mark on a Canvas representation of a PDF Page
 * @param mark to be drawn
 * @param ctx a Canvas representation of a specific PDF Page
 * @deprecated use drawMarkText
 */
export const draw = (mark: Mark, ctx: CanvasRenderingContext2D) => {
  const { location } = mark
  ctx.font = FONT_SIZE + 'px ' + FONT_TYPE
  ctx.fillStyle = 'black'
  const textMetrics = ctx.measureText(mark.value!)
  const textHeight =
    textMetrics.actualBoundingBoxAscent + textMetrics.actualBoundingBoxDescent
  const textX = location.left + (location.width - textMetrics.width) / 2
  const textY = location.top + (location.height + textHeight) / 2
  ctx.fillText(mark.value!, textX, textY)
}

/**
 * Draws a Mark on a PDF Page
 * @param mark to be drawn
 * @param page PDF Page
 * @param font embedded font
 */
export const drawMarkText = (mark: Mark, page: PDFPage, font: PDFFont) => {
  const { location } = mark
  const { height } = page.getSize()

  // Convert the mark location origin (top, left) to PDF origin (bottom, left)
  const x = location.left
  const y = height - location.top

  // Adjust y-coordinate for the text, drawText's y is the baseline for the font
  // We start from the y (top location border) and we need to bump it down
  // We move font baseline by the difference between rendered height and actual height (gap)
  // And finally move down by the height without descender to get the new baseline
  const adjustedY =
    y -
    (font.heightAtSize(FONT_SIZE) - FONT_SIZE) -
    font.heightAtSize(FONT_SIZE, { descender: false })

  page.drawText(`${mark.value}`, {
    x,
    y: adjustedY,
    size: FONT_SIZE,
    font: font,
    color: rgb(0, 0, 0),
    maxWidth: location.width,
    lineHeight: FONT_SIZE * FONT_LINE_HEIGHT
  })
}

/**
 * @param marks - an array of Marks
 * @function hasValue removes any Mark without a property
 * @function byPage groups remaining Marks by their page marks.location.page
 */
export const groupMarksByFileNamePage = (marks: Mark[]) => {
  return marks
    .filter(hasValue)
    .reduce<{ [fileName: string]: { [page: number]: Mark[] } }>(byPage, {})
}

/**
 * A reducer callback that transforms an array of marks into an object grouped by the page number
 * Can be replaced by Object.groupBy https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/groupBy
 * when it is implemented in TypeScript
 * Implementation is standard from the Array.prototype.reduce documentation
 * @param obj - accumulator in the reducer callback
 * @param mark - current value, i.e. Mark being examined
 */
export const byPage = (
  obj: { [filename: string]: { [page: number]: Mark[] } },
  mark: Mark
) => {
  const fileName = mark.fileName
  const pageNumber = mark.location.page
  const pages = obj[fileName] ?? {}
  const marks = pages[pageNumber] ?? []
  return {
    ...obj,
    [fileName]: {
      ...pages,
      [pageNumber]: [...marks, mark]
    }
  }
}

async function embedFont(pdf: PDFDocument) {
  const fontBytes = await fetch(defaultFont).then((res) => res.arrayBuffer())

  pdf.registerFontkit(fontkit)

  const embeddedFont = await pdf.embedFont(fontBytes)
  return embeddedFont
}