Homelab, Linux, JS & ABAP (~˘▾˘)~
 

[nodejs] Parsing multipart/mixed response (containing a file stream)

Recently I had to consume an API which returned multipart/mixed data. A response looked like this:

--Boundary_0000000000001
Content-Type: application/octet-stream
Content-Disposition: attachment; filename"test.pdf"

%PDF-1.7
%�������
1 0 obj
...
%%EOF

--Boundary_0000000000001
Content-Type: application/json

{"data":[]}
--Boundary_0000000000001--

There are some node packages for parsing multipart responses, but most can only handle multipart/formData and not multipart/mixed. The most recommended package for multipart/mixed is Dicer, but to be honest, I wasn’t sure how to use it properly. Therefore, I built my own parser. Luckily the user idbehold provided a function to parse a response string into a json object here. To get it working, I just had to change the regex expressions in the split function. The most important step is to convert the data from the arrayBuffer to a String in binary encoding before parsing.

Also, I wrote two helper functions. The first one to parse the boundary string from the Content-Type and the second one to parse the filename from the Content-Dispositon Header of your response.

module.exports = new class multipartMixedParser {

    parse(boundary, buffer) {
        const body = buffer.toString('binary') //toString encodes to utf-8 as default, this would lead to corrupted pdf's     
        return body.split(boundary).reduce((parts, part) => {
            if (part && part !== '--\r\n') {
                const [head, body] = part.trim().split(/\r\n\r\n/g)
                console.log({ body })
                parts.push({
                    body: body,
                    headers: head.split(/\r\n/g).reduce((headers, header) => {
                        const [key, value] = header.split(/:\s+/)
                        headers[key.toLowerCase()] = value
                        return headers
                    }, {})
                })
            }
            return parts
        }, [])
    }

    getBoundaryFromResponseHeaders(headers) {
        //example: multipart/mixed;boundary=Boundary_0000000000001 -> --Boundary_0000000000001
        const contentType = headers.get('content-type')
        return '--' + contentType.split("=")[1].split(";")[0]
    }

    getFileNameFromContentDisposition(cd) {
        //example: 'attachment; filename="example.pdf"' -> example.pdf
        return cd.slice(
            cd.indexOf('"') + 1,
            cd.lastIndexOf('"')
        )
    }

}

And that’s how I’m calling the API and using the multipartMixedParser Class to parse the response. The API I was using is expecting a file as formData and is also returning a file (as part of the multipart/mixed response).
It’s important to get the buffer from the response. If you would use response.getText() it would convert the data to an utf-8 encoded string which will lead to corrupted files.

Please note, I’m using node-fetch. When using Axios, the response object will look different.

const btoa = require('btoa')
const FormData = require('form-data')
const fetch = require('node-fetch')
const multipartMixedParser = require('./multipartMixedParser') 

function callAPI(file) {

        const form = new FormData()
        form.append('file', file.content, {
            contentType: file.mediaType,
            filename: file.fileName
        })

        const headers = {
            'Authorization': 'Basic ' + btoa(username + ':' + password),
            ...form.getHeaders()
        }

        const url = /my/api/path

        try {
            const response = await fetch(url, {
                method: 'POST',
                headers: headers,
                body: form
            })
            if (!response.ok) throw new Error(response.statusText)

            //parse the response
            const buffer = await response.buffer() 
            const boundary = multipartMixedParser.getBoundaryFromResponseHeaders(response.headers)

            const result = multipartMixedParser.parse(boundary, buffer)

            // in my case I only returned the file content as buffer and filename 
            return {
                fileContent: Buffer.from(result[0].body, 'binary'),
                fileName: multipartMixedParser.getFileNameFromContentDisposition(result[0].headers["content-disposition"])
            }
        } catch (err) {
            console.log("Error message: " + err.message)
        }

}

Leave a Reply

Your email address will not be published. Required fields are marked *