Workers KV multiple image upload - Custom formData parser - Tutorial

KV multiple image upload - Tutorial

GET

The get should be done with KV bindings (Fast, 30 ms), in arrayBuffer mode

const value = await KV.get(k, 'arrayBuffer');

PUT

You have 2 possibilities :

  • Use the KV API to put the file in KV (Extremely slow, 1.25 s)
  • Use a formData parser to get the file, then use the KV bindings to put the file in KV (Relatively fast, 50-60 ms)

Problem : Response.formData() does not work

Unfortunately, handling multipart/form-data with binary files like images by using response.formData() does not work in Cloudflare Workers. The response goes through a utf-8 transformation which corrupts the files and make them unreadable. In addition, you lose the filename information, which sometimes is important.

Solution : Custom formData parser

I wrote a custom formData parser in JavaScript to do the job. It does not respect the standard API, but it does the job roughtly. It handles binary files like images, text and json files, and simple form values, even those on multiple lines. That should be enough for most usages. Feel free to alter to match your needs.

Try to copy / paste this JS code snippet in your Chrome console:

const main = async () => {
  const o = {};
  const o2 = {};
  const body = new FormData();
  o['50'] = '';
  o['100'] = '';
  o['test.txt'] = 'data:text/plain;base64,YWFh'; //contains : 'aaa'
  o['test.json'] = 'data:application/json;base64,eyJKb2huIjoiRG9lIn0='; //contains : '{"John":"Doe"}'
  for (const k in o) { body.append(k, await (await fetch(o[k])).blob(), k); }
  o2['1'] = JSON.stringify({ name: 'Alice' });
  o2['2'] = JSON.stringify({ name: 'Bob' });
  o2['x'] = 'yyy';
  o2['u'] = "v\nv2";
  for (const k in o2) { body.append(k, o2[k]); }
  const res = new Response(body);
  //console.log(JSON.stringify(Object.fromEntries(res.headers)));
  //console.log(await res.clone().text());
  const event = { request: res, response: res }; // Fake event, fake request
  console.time('formData');
  const fd = await formData(event); // Returns an array [{name,value,filename,content-type}]
  console.timeEnd('formData');
  console.log(fd);
  if (!fd) { return; }
  const z = fd[1];
  const blob = new Blob([new Uint8Array(z.value, 0, z.value.byteLength)]);
  //const blob = new Blob(z.value);
  const url = URL.createObjectURL(blob);
  document.querySelector('body').insertAdjacentHTML('beforeend', `<img src='${url}' width=50>`);
  //URL.revokeObjectURL(url);
}
main();

const formData = async (event) => {
  // content-type headers must be valid and offer a boundary
  const boundary = await getBoundary(event);
  if (!boundary) { return null; }
  // Payload size must be under 1000kB
  const ab = await event.request.arrayBuffer();
  const max = ab.byteLength;
  if (!await lessThan1000kB(max)) { return null; }
  // Start to parse
  const t = new TextDecoder();
  const fd = Array();
  let o = Object();
  let i = 0;
  //console.log({ ab, i, max });
  while (i < max) {
    i = await getPart(ab, i, max, boundary, t, o);
    if (!o || !o.name) { break; }
    fd.push(o);
    o = Object();
  }
  return fd;
}

const lessThan1000kB = async (max) => {
  if (Math.ceil(max / 1000) > 1000) { return false; }
  return true;
}

const getBoundary = async (event) => {
  const ct = event.request.headers.get('content-type');
  return `--${(ct.match(/^multipart\/form\-data; boundary=(.+)$/))?.[1]}`;
}

const getPart = async (ab, start, max, boundary, t, o) => {
  let s = '';
  let i = start;
  //console.log('A');
  i = await readString(boundary, i, ab, t, max);
  if (i === max) { return max; }
  //console.log('B');
  if (await eof(i, ab, t, max)) { return max; }
  if (i === max) { return max; }
  //console.log('C');
  i = await readNewLine(i, ab, t, max);
  if (i === max) { return max; }
  //console.log('D');
  i = await readString('Content-Disposition: form-data; name="', i, ab, t, max);
  if (i === max) { return max; }
  //console.log('E');
  i = await captureUntil(['"'], i, ab, t, max, o, 'name');
  if (i === max) { return max; }
  //console.log('F');
  if (!o.name) { o = null; return max; }
  if (i === max) { return max; }
  //console.log('G');
  let x;
  if ((x = await readValue("\"\n\n", boundary, i, ab, t, max, o))) { return x; }
  if ((x = await readValue("\"\r\n\r\n", boundary, i, ab, t, max, o))) { return x; }
  return readFile(boundary, i, ab, t, max, o);
}

// Depending on carriage return prefix,
// Try to read a simple value (implicit text/plain)
const readValue = async (s, boundary, i, ab, t, max, o) => {
  if (!await checkString(s, i, ab, t, max)) { return false; }
  //console.log('I');
  i = await readString(s, i, ab, t, max);
  //console.log('I2');
  i = await captureUntil([/*`${"\r\n"}${boundary}`, */`${"\n"}${boundary}`], i, ab, t, max, o, 'value');
  //console.log('I3');
  i -= boundary.length - 1;
  return i;
}

// Read a file, like text, json or images
const readFile = async (boundary, i, ab, t, max, o) => {
  //console.log('J');
  i = await readString('"; filename="', i, ab, t, max);
  if (i === max) { return max; }
  //console.log('K');
  i = await captureUntil(['"'], i, ab, t, max, o, 'filename');
  if (i === max) { return max; }
  //console.log('L');
  i = await readString('"', i, ab, t, max);
  i = await readNewLine(i, ab, t, max);
  if (i === max) { return max; }
  //console.log('M');
  i = await readString('Content-Type: ', i, ab, t, max);
  if (i === max) { return max; }
  //console.log('N');
  i = await captureUntil(["\r", "\n"], i, ab, t, max, o, 'content-type');
  if (i === max) { return max; }
  //console.log('O');
  if (!o['content-type']) { o = null; return max; }
  if (i === max) { return max; }
  //console.log('P');
  i = await readNewLine(i, ab, t, max);
  if (i === max) { return max; }
  //console.log('Q');
  i = await readNewLine(i, ab, t, max);
  if (i === max) { return max; }
  //console.log('R');
  i = await captureUntil([`${"\r\n"}${boundary}`, `${"\n"}${boundary}`], i, ab, t, max, o, 'value', o['content-type']);
  if (i === max) { return max; }
  //console.log('S');
  i -= boundary.length - 1;
  return i;
}

const charAt = async (i, ab, t) => {
  return t.decode(ab.slice(i, i + 1));
}

// Check if end of file is found
// Meaningful only when used directly after readString(boundary)
const eof = async (i, ab, t, max) => {
  if (i + 1 === max) { return false; }
  const s = t.decode(ab.slice(i, i + 2));
  return s === '--';
}

// Try to read \n
// If \r is found, second chance is given
// If max reached, or if \n not found, return max to stop the machine
const readNewLine = async (i, ab, t, max) => {
  if (await charAt(i, ab, t) === "\r") { if (++i === max) { return max; } }
  if (await charAt(i, ab, t) === "\n") { return ++i; }
  return max;
}

const checkString = async (v, i, ab, t, max) => {
  return readString(v, i, ab, t, max, true)
}

// Try to read v
// If max reached, or if v not found, return max
const readString = async (v, i, ab, t, max, check = false) => {
  const start = i;
  let j = 0;
  const len = v.length;
  while (i < max && j < len) {
    ++i;
    ++j;
  }
  const s = t.decode(ab.slice(start, i));
  //console.log('expected : v :', v);
  //console.log(`${check ? 'check' : 'read '}${'    : s :'}`, s);
  if (s === v) { return check ? true : i; }
  return check ? false : max;
}

// Try to capture, until we get one the values inside a (Ex : \r or \n)
const captureUntil = async (a, i, ab, t, max, o, prop, contentType = 'text/plain') => {
  return readUntil(a, i, ab, t, max, true, o, prop, contentType);
}

// Try to read, until we get one the values inside a (Ex : \r or \n)
const readUntil = async (a, i, ab, t, max, capture = false, o = Object(), prop = '_', contentType = 'text/plain') => {
  const start = i;
  // Until end
  while (i < max) {
    //console.log(i, await charAt(i, ab, t));
    // For each token
    for (const v of a) {
      const end = Math.max(start, i - v.length);
      // Extract ending text, at the size of the token
      const tmp = t.decode(ab.slice(end, i));
      // If extracted ending text not equals to the token, do nothing
      if (tmp !== v) { continue; }
      // Capture data
      if (capture) { await captureData(ab, start, end, t, o, prop, contentType); }
      // Return current position
      --i;
      //console.log('readUntilOut1', i);
      return i;
    }
    // Go to next char
    ++i;
  }
  // console.log('readUntilOut2', i);
  return i;
}

const captureData = async (ab, start, end, t, o, prop, contentType) => {
  //console.log('capture', prop, contentType);
  //console.log(start, end);
  const sub = ab.slice(start, end);
  await captureJson(sub, t, o, prop, contentType);
  await captureTextAsJson(sub, t, o, prop, contentType);
  await captureText(sub, t, o, prop, contentType);
  await captureBlob(sub, o, prop, contentType);
}

const captureJson = async (sub, t, o, prop, contentType) => {
  if (contentType !== 'application/json') { return; }
  let s = t.decode(sub);
  //console.log('captureJson', s);
  o[prop] = JSON.parse(s);
}

const captureTextAsJson = async (sub, t, o, prop, contentType) => {
  const s = t.decode(sub);
  if (contentType !== 'text/plain' || s[0] !== '{') { return; }
  //console.log('captureTextAsJson1');
  let o2;
  //try { o2 = JSON.parse(s); } catch (e) { }
  try { o2 = JSON.parse(s); } catch (e) { }
  if (!o2) { return; }
  o[prop] = o2;
  o['content-type'] = 'application/json';
  //console.log('captureTextAsJson2', s);
}

const captureText = async (sub, t, o, prop, contentType) => {
  //console.log('captureText1', contentType);
  if (contentType !== 'text/plain' ||
    o['content-type'] && o['content-type'] !== 'text/plain') { return; }
  let s = t.decode(sub);
  //console.log('captureText2', s);
  o[prop] = s;
}

const captureBlob = async (sub, o, prop, contentType) => {
  if (contentType === 'text/plain' || contentType === 'application/json') { return; }
  //console.log('captureBlob');
  o[prop] = sub;
}

It should display :

0: {name: "50", filename: "50", content-type: "image/webp", value: ArrayBuffer(1370)}
1: {name: "100", filename: "100", content-type: "image/webp", value: ArrayBuffer(3414)}
2: {name: "test.txt", filename: "test.txt", content-type: "text/plain", value: "aaa"}
3: {name: "test.json", filename: "test.json", content-type: "application/json", value: {…}}
4: {name: "1", value: {…}, content-type: "application/json"}
5: {name: "2", value: {…}, content-type: "application/json"}
6: {name: "x", value: "yyy"}
7: {name: "u", value: "v↵v2"}

… indicating the form data has been successfully parsed, in 39 ms.

Now, with KV bindings you just have to :

  • PUT the value into KV
  • GET the value from KV using the arrayBuffer mode

And voilà ! :grinning:

6 Likes

This doesn’t timeout over the Worker limit?

The FormData format itself is limited since you can only read it sequentially (byte after byte). So yes, it is slow for multiple entries. 39 ms is at the limit if you do some KV sets after, but in my experience it still works as I didn’t see any hard limit (tried up to 70ms at peak).

For increased performance, I switched on another methodology :

Client-side : Read all images, transform them as an arrayBuffer, concat them as an arrayBuffer, and fill the content type header with custom data : width of images and their respective size in terms of ArrayBuffer length.

CloudFlare Workers : Read data as ArrayBuffer, read the content type header, and, since I know the boundaries, process all images asynchronously (Promise.all).

This is a lot faster, like 10ms.

1 Like

Would love an updated faster version of this :slight_smile:

1 Like