Homelab, Linux, JS & ABAP (~˘▾˘)~
 

[nodejs] Extract first page of a PDF using pdf-lib

const { PDFDocument } = require('pdf-lib')

// file = { fileName: 'test1.pdf, content: arraybuffer }     

const originalPdf = await PDFDocument.load(file.content, { ignoreEncryption: true })
const newPdf = await PDFDocument.create()
const [firstPage] = await newPdf.copyPages(originalPdf, [0]) // <-- 0 is the first page
newPdf.addPage(firstPage)
const firstPagePdf = await newPdf.save()

file.content = Buffer.from(firstPagePdf)

[nodejs] Merge PDFs using pdf-lib

https://github.com/Hopding/pdf-lib

const { PDFDocument } = require('pdf-lib')

// files = [{ fileName: 'test1.pdf, content: arraybuffer },{ fileName: 'test2.pdf, content: arraybuffer }]

mergePdfs: async function (files) {
        try {
            const mergedPdf = await PDFDocument.create()

            for (let file of files) {
                const pdf = await PDFDocument.load(file.content)
                const copiedPages = await mergedPdf.copyPages(pdf, pdf.getPageIndices())
                copiedPages.forEach((page) => mergedPdf.addPage(page))
            }

            const mergedPdfFile = await mergedPdf.save()
            const buffer = Buffer.from(mergedPdfFile)
            return await buffer.toString('base64') // return as buffer or base64 encoded file
        } catch (err) {
            console.error(err.message)
        }
}

[nodejs] workspaces

If you have subdirectories or additional applications which have its own package.json file, you can add them via the workspaces setting to your main project.

 {
  "name": "my-project",
  "workspaces": [ 
     "./app/*"
   ]
}

When running npm install it will now also install the dependencies of all projects in the app folder.

[JavaScript] Clone an object containing a file as ArrayBuffer

Use the build in function structuredClone() to copy all kind of complex JS types.

Official documentation: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm#supported_types

Comparison to other copy techniques like spread or json.stringify: https://www.builder.io/blog/structured-clone

[nodejs] Node Version Manager (NVM)

# Install script: https://github.com/nvm-sh/nvm#install--update-script
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.5/install.sh | bash

# check current used node version
node -v

# list local available node versions
nvm list

# list all released node versions
nvm ls-remote

# install specific node version and switch to it
nvm install v20.5.1

# switch to a specific node version, which is already installed
nvm use v20.5.1

[CAP] Posting form data to a destination using executeHttpRequest

        const { executeHttpRequest } = require('@sap-cloud-sdk/http-client')
        const FormData = require('form-data')

        try {
            //Create payload
            const form = new FormData()
            form.append('file', fileContent, {
                contentType: 'application/pdf'
                filename: 'test.pdf'
            })

            //Create headers
            const headers = {
                ...form.getHeaders(),
                'Content-Length': form.getLengthSync(),
            }

            //Send to Destination
            const response = await executeHttpRequest(
                { destinationName: 'TESTINATION' },
                {
                    method: 'POST',
                    url: 'myApiPath',
                    headers: headers,
                    data: form,
                    responseType: 'arraybuffer' // if you need the response data as buffer to prevent UTF-8 encoding
                }
            )
            console.log({response})
        } catch (error) {
            console.error(error.message)
        }

[nodejs] Create buffer from stream

Using a promise

const streamToBuffer= async () => {
        return new Promise(function (resolve, reject) {
                const chunks = []
                stream.on('data', chunk => chunks.push(chunk))
                stream.on('end', () => resolve(Buffer.concat(chunks)))
                stream.on("error", err => reject(err))
        })
}
const buffer = await streamToBuffer()

A stream is also iterable (see here), so you can also use for await...of (example)

        const chunks = []
        for await (const chunk of stream) {
            chunks.push(chunk)
        }
        const buffer = Buffer.concat(chunks)

[nodejs] Parsing multipart/mixed response (containing a file stream)

Recently I had to consume an API which returned multipart/mixed data. A response looked like this:

--Boundary_0000000000001
Content-Type: application/octet-stream
Content-Disposition: attachment; filename"test.pdf"

%PDF-1.7
%�������
1 0 obj
...
%%EOF

--Boundary_0000000000001
Content-Type: application/json

{"data":[]}
--Boundary_0000000000001--

There are some node packages for parsing multipart responses, but most can only handle multipart/formData and not multipart/mixed. The most recommended package for multipart/mixed is Dicer, but to be honest, I wasn’t sure how to use it properly. Therefore, I built my own parser. Luckily the user idbehold provided a function to parse a response string into a json object here. To get it working, I just had to change the regex expressions in the split function. The most important step is to convert the data from the arrayBuffer to a String in binary encoding before parsing.

Also, I wrote two helper functions. The first one to parse the boundary string from the Content-Type and the second one to parse the filename from the Content-Dispositon Header of your response.

module.exports = new class multipartMixedParser {

    parse(boundary, buffer) {
        const body = buffer.toString('binary') //toString encodes to utf-8 as default, this would lead to corrupted pdf's     
        return body.split(boundary).reduce((parts, part) => {
            if (part && part !== '--\r\n') {
                const [head, body] = part.trim().split(/\r\n\r\n/g)
                console.log({ body })
                parts.push({
                    body: body,
                    headers: head.split(/\r\n/g).reduce((headers, header) => {
                        const [key, value] = header.split(/:\s+/)
                        headers[key.toLowerCase()] = value
                        return headers
                    }, {})
                })
            }
            return parts
        }, [])
    }

    getBoundaryFromResponseHeaders(headers) {
        //example: multipart/mixed;boundary=Boundary_0000000000001 -> --Boundary_0000000000001
        const contentType = headers.get('content-type')
        return '--' + contentType.split("=")[1].split(";")[0]
    }

    getFileNameFromContentDisposition(cd) {
        //example: 'attachment; filename="example.pdf"' -> example.pdf
        return cd.slice(
            cd.indexOf('"') + 1,
            cd.lastIndexOf('"')
        )
    }

}

And that’s how I’m calling the API and using the multipartMixedParser Class to parse the response. The API I was using is expecting a file as formData and is also returning a file (as part of the multipart/mixed response).
It’s important to get the buffer from the response. If you would use response.getText() it would convert the data to an utf-8 encoded string which will lead to corrupted files.

Please note, I’m using node-fetch. When using Axios, the response object will look different.

const btoa = require('btoa')
const FormData = require('form-data')
const fetch = require('node-fetch')
const multipartMixedParser = require('./multipartMixedParser') 

function callAPI(file) {

        const form = new FormData()
        form.append('file', file.content, {
            contentType: file.mediaType,
            filename: file.fileName
        })

        const headers = {
            'Authorization': 'Basic ' + btoa(username + ':' + password),
            ...form.getHeaders()
        }

        const url = /my/api/path

        try {
            const response = await fetch(url, {
                method: 'POST',
                headers: headers,
                body: form
            })
            if (!response.ok) throw new Error(response.statusText)

            //parse the response
            const buffer = await response.buffer() 
            const boundary = multipartMixedParser.getBoundaryFromResponseHeaders(response.headers)

            const result = multipartMixedParser.parse(boundary, buffer)

            // in my case I only returned the file content as buffer and filename 
            return {
                fileContent: Buffer.from(result[0].body, 'binary'),
                fileName: multipartMixedParser.getFileNameFromContentDisposition(result[0].headers["content-disposition"])
            }
        } catch (err) {
            console.log("Error message: " + err.message)
        }

}