Homelab, Linux, JS & ABAP (~˘▾˘)~
 

[nodejs] Parsing multipart/mixed response (containing a file stream)

Recently I had to consume an API which returned multipart/mixed data. A response looked like this:

01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
--Boundary_0000000000001
Content-Type: application/octet-stream
Content-Disposition: attachment; filename"test.pdf"
 
%PDF-1.7
%�������
1 0 obj
...
%%EOF
 
--Boundary_0000000000001
Content-Type: application/json
 
{"data":[]}
--Boundary_0000000000001--

There are some node packages for parsing multipart responses, but most can only handle multipart/formData and not multipart/mixed. The most recommended package for multipart/mixed is Dicer, but to be honest, I wasn’t sure how to use it properly. Therefore, I built my own parser. Luckily the user idbehold provided a function to parse a response string into a json object here. To get it working, I just had to change the regex expressions in the split function. The most important step is to convert the data from the arrayBuffer to a String in binary encoding before parsing.

Also, I wrote two helper functions. The first one to parse the boundary string from the Content-Type and the second one to parse the filename from the Content-Dispositon Header of your response.

01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
module.exports = new class multipartMixedParser {
 
    parse(boundary, buffer) {
        const body = buffer.toString('binary') //toString encodes to utf-8 as default, this would lead to corrupted pdf's    
        return body.split(boundary).reduce((parts, part) => {
            if (part && part !== '--\r\n') {
                const [head, body] = part.trim().split(/\r\n\r\n/g)
                console.log({ body })
                parts.push({
                    body: body,
                    headers: head.split(/\r\n/g).reduce((headers, header) => {
                        const [key, value] = header.split(/:\s+/)
                        headers[key.toLowerCase()] = value
                        return headers
                    }, {})
                })
            }
            return parts
        }, [])
    }
 
    getBoundaryFromResponseHeaders(headers) {
        //example: multipart/mixed;boundary=Boundary_0000000000001 -> --Boundary_0000000000001
        const contentType = headers.get('content-type')
        return '--' + contentType.split("=")[1].split(";")[0]
    }
 
    getFileNameFromContentDisposition(cd) {
        //example: 'attachment; filename="example.pdf"' -> example.pdf
        return cd.slice(
            cd.indexOf('"') + 1,
            cd.lastIndexOf('"')
        )
    }
 
}

And that’s how I’m calling the API and using the multipartMixedParser Class to parse the response. The API I was using is expecting a file as formData and is also returning a file (as part of the multipart/mixed response).
It’s important to get the buffer from the response. If you would use response.getText() it would convert the data to an utf-8 encoded string which will lead to corrupted files.

Please note, I’m using node-fetch. When using Axios, the response object will look different.

01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
const btoa = require('btoa')
const FormData = require('form-data')
const fetch = require('node-fetch')
const multipartMixedParser = require('./multipartMixedParser')
 
function callAPI(file) {
 
        const form = new FormData()
        form.append('file', file.content, {
            contentType: file.mediaType,
            filename: file.fileName
        })
 
        const headers = {
            'Authorization': 'Basic ' + btoa(username + ':' + password),
            ...form.getHeaders()
        }
 
        const url = /my/api/path
 
        try {
            const response = await fetch(url, {
                method: 'POST',
                headers: headers,
                body: form
            })
            if (!response.ok) throw new Error(response.statusText)
 
            //parse the response
            const buffer = await response.buffer()
            const boundary = multipartMixedParser.getBoundaryFromResponseHeaders(response.headers)
 
            const result = multipartMixedParser.parse(boundary, buffer)
 
            // in my case I only returned the file content as buffer and filename
            return {
                fileContent: Buffer.from(result[0].body, 'binary'),
                fileName: multipartMixedParser.getFileNameFromContentDisposition(result[0].headers["content-disposition"])
            }
        } catch (err) {
            console.log("Error message: " + err.message)
        }
 
}

Leave a Reply

Your email address will not be published. Required fields are marked *