I’m working on building a worker that will stream file downloads so that we might decrypt the encrypted-at-rest files just before they reach the user (browser). For files up to just over 3MB, I have a working solution. For larger files, we have to decrypt while streaming.
Using the example at [ templates/examples/fast-google-fonts at main · cloudflare/templates · GitHub ] as a basis to stream content with an unencrypted 534 MB file (.iso), I am getting the file downloaded and saved to disk just fine, but the browser isn’t showing that I’m downloading anything. Suddenly, when the download is complete, the “downloading a file” stuff shows up and the entire file is suddenly saved to disk. I would expect to see the browser put the file on it’s list of downloads and show a progress bar or circle or whatever in the normal way. The resulting file is binary identical to the original (hashed them to confirm), so the download is successful; it just doesn’t look like we’re downloading during the downloading.
The test file is stored in an AWS S3 bucket. If I download the file directly at S3, instead of through the worker, the browser behaves in the normal, familiar way. The only difference I see (using curl) in the results being returned from the worker vs. directly at S3 is that the worker is adding the “Transfer-Encoding: chunked” header (which my code does not specify. Perhaps the example I’m building this from is not quite the right way to get the output of the worker (to the browser) streaming properly?
I’m developing and testing this using Cloudworker running on my machine. I have a PHP server-side piece that provides app and API. The worker has to connect via the API to get redirected (pre-signed URL) to the S3 link to retrieve the files. I have configured cloudworker so that it only has ~165 MB RAM available, so I’m pretty sure that it’s not buffering all 534 MB of the test file.
Here’s the full worker code I have so far (again, this works, but it doesn’t look like it’s downloading while downloading):
// Workers can only decode utf-8 so keep a list of character encodings that can be decoded.
const VALID_CHARSETS = ['utf-8', 'utf8', 'iso-8859-1', 'us-ascii'];
const environment = "DEV";
addEventListener ('fetch', event =>
{
event.passThroughOnException ();
// Is this an HTTP GET request?
if (event.request.method === "GET")
{
const url = new URL (event.request.url);
const accept = event.request.headers.get ('Accept');
// Does the path in the URL begin with the path we will be processing?
if (url.pathname.startsWith ('/files/'))
{
event.respondWith (processRequest (event.request, event));
}
else // The path in the URL does NOT begin with the path we would be processing.
{
event.respondWith (handleRequest (event.request));
}
}
});
async function handleRequest (request)
{
console.debug ("handleRequest () called.");
return passItThrough (request);
}
async function processRequest (request, event)
{
const { headers } = request;
const url = "https://my-box/" + 'API/x.x/files/'; // Not via localhost, becuase of SSL cert.
const clientOAuth2AccessToken = "encrypted.JWT.stuffs";
// TODO: Validate authentication token, preferably gotten from the browser,
// instead of hardcoded like it is here (for texting).
console.log ("There is a spoon!");
let requestURL = new URL (request.url);
// TEMPORARY bypass some noise (because Chrome almost always comes looking for a favicon).
if (requestURL.pathname.startsWith ("/favicon.ico"))
{
return new Response ("File not found", {status: 404});
}
// Prepare the fetch.
let fetchMe = url + requestURL.pathname;
// Does the request URL's pathname match the proper pattern?
if (requestURL.pathname.match (/\/files\/[0-9]+/))
{
fetchMe = url + requestURL.pathname.match (/\/files\/([0-9]+)\??.*/)[1] + "?file";
}
let fetchAttrs = {headers:{}, redirect: 'manual'};
for (pair of headers.entries ())
{
// Is this NOT the cookie header?
if (pair[0].toLowerCase () !== "cookie")
{
//console.debug ("processRequest () " + pair[0] + ":", pair[1]);
fetchAttrs['headers'][pair[0]] = pair[1];
}
}
fetchAttrs['headers']['Cookie'] = headers.get ("Cookie") + "; AT=" + clientOAuth2AccessToken;
fetchAttrs['headers']['Referer'] = "https://my-box/dashboard/";
// fetchAttrs['headers']['Authorization'] = "Bearer " + getCookie ("AT");
//console.debug (" >>>> fetchAttrs", fetchAttrs, " <<<< fetchAttrs");
// Let's go get it!
const response = await fetch (fetchMe, fetchAttrs);
// Did we get a valid Response object?
if (response)
{
switch (response.status)
{
case 200:
return response;
case 304:
case 307:
const fileResponse = await fetch (response.headers.get ("Location"));
isEncryptedHeader = getHeaderValue ("MyFiles-Encrypted", response.headers);
switch (fileResponse.status)
{
case 200:
let newResponseInit =
{
headers: copyHeaders (fileResponse.headers),
status: 200,
statusText: "OK"
};
// Create an identity TransformStream (a.k.a. a pipe).
// The readable side will become our new response body.
const {readable, writable} = new TransformStream ();
// Create a cloned response with our modified stream.
const newResponse = new Response (readable, fileResponse);
// Is the MyFiles-Encrypted header true?
if (isEncryptedHeader)
{
// Start the async decryption processing of the response stream.
processEncryptedFileDownload (response.body, writable, request, event);
}
else // The MyFiles-Encrypted header is NOT true.
{
// Start the async processing of the response stream.
copyItThrough (fileResponse.body, writable, request, event);
}
// Return the in-process response so it can be streamed.
console.debug ("processRequest () returning newResponse object.");
return newResponse;
default:
return fileResponse;
}
}
}
console.debug ("processRequest () returning the response.");
return response;
}
async function processEncryptedFileDownload (readable, writable, request, event)
{
console.debug ("processEncryptedFileDownload () called (not yet implemented).");
let responseInit =
{
headers:
{
'Content-Type': fileResponse.headers.get ('Content-Type'),
'Content-Disposition': fileResponse.headers.get ('Content-Disposition'),
'MyFiles-Encrypted': true,
'MyFiles-Decrypted': false
}
};
switch (fileResponse.status)
{
case 200:
// Decrypt while streaming code will go here (based on the copyItThrough () function).
break;
default:
break;
}
}
async function processHTMLResponse (response, request, event)
{
console.debug ("processHTMLResponse () called.");
// Workers can only decode utf-8. If it is anything else, pass the response through unmodified.
const contentType = response.headers.get ("content-type");
const charsetRegex = /charset\s*=\s*([^\s;]+)/mgi;
const match = charsetRegex.exec (contentType);
// Was the Regex match good?
if (match !== null)
{
let charset = match[1].toLowerCase ();
// Is the indicated character set NOT in the VALID_CHARSETS list?
if (!VALID_CHARSETS.includes (charset))
{
return response;
}
}
// Create an identity TransformStream (a.k.a. a pipe).
// The readable side will become our new response body.
const {readable, writable} = new TransformStream ();
// Create a cloned response with our modified stream.
const newResponse = new Response (readable, response);
// Start the async processing of the response stream.
copyItThrough (response.body, writable, request, event);
// Return the in-process response so it can be streamed.
return newResponse;
}
/**
* Directly stream the request without touching anything.
* @param {Request} request
*/
async function passItThrough (request)
{
console.debug ("passItThrough () called.");
let response = await fetch (request);
let {readable, writable} = new TransformStream ();
response.body.pipeTo (writable);
return new Response (readable, response);
}
async function copyItThrough (readable, writable, request, event)
{
console.debug ("copyItThrough () called.");
const reader = readable.getReader ();
const writer = writable.getWriter ();
const encoder = new TextEncoder ();
let decoder = new TextDecoder ("utf-8", {fatal: true});
let firstChunk = true;
let unsupportedCharset = false;
let content = "";
let partial = "";
try
{
console.debug ("copyItThrough () starting \"infinite\" loop.");
for (;;)
{
//console.debug ("copyItThrough () \"infinite\" loop iteration start; await-ing reader.read (),");
const {done, value} = await reader.read ();
// const stuff = await reader.read ();
// Are we done?
if (done)
{
console.debug ("copyItThrough () done == [", done, "].");
// Is partial NOT empty.
if (partial.length)
{
partial = await modifyHTMLChunk (partial, request, event);
await writer.write (encoder.encode (partial))
}
break;
}
//console.debug ("copyItThrough () NOT done.");
let chunk = null;
// Do we already know that the character set is NOT supported?
if (unsupportedCharset)
{
//console.debug ("copyItThrough () Unsupported charset: await'ing writer.write (value).");
await writer.write (value);
//console.debug ("copyItThrough () continue;")
continue;
}
else // We do NOT already know if the character set is or is not supported.
{
//console.debug ("copyItThrough () Supported charset.");
try
{
//console.debug ("copyItThrough () \"try\"-ing: chunk = decoder.decode (value, {stream: true}).");
chunk = decoder.decode (value, {stream: true});
}
catch (e)
{
console.debug ("copyItThrough () caught exception \"try\"-ing to decoder.decode (value).\n unsupportedCharset = true;")
unsupportedCharset = true;
// Is partial NOT empty?
if (partial.length)
{
console.debug ("copyItThrough () partial.length is", partial.length, "; writer.write (encoder.encode (partial));\n partial = \"\";");
await writer.write (encoder.encode (partial));
partial = "";
}
console.debug ("copyItThrough () await'ing writer.write (value).");
await writer.write (value);
console.debug ("copyItThrough () continue;")
continue;
}
}
try
{
//console.debug ("copyItThrough () \"try\"-ing the first chunk.");
// Is this the first chunk?
if (firstChunk)
{
console.debug ("copyItThrough () looking inside of the first chunk for ...");
firstChunk = false;
// Does this chunk contain characters NOT part of a supported character set?
if (chunkContainsInvalidCharset (chunk))
{
console.debug ("copyItThrough () The first chunk does NOT contain a valid charset.");
// Switch to passthrough.
unsupportedCharset = true;
// Is partial NOT empty?
if (partial.length)
{
console.debug ("copyItThrough () first chunk partial.length is", partial.length, ";\n writer.write (encoder.encode (partial));\n partial = \"\";");
await writer.write (encoder.encode (partial));
partial = "";
}
console.debug ("copyItThrough () first chunk await'ing writer.write (value).");
await writer.write (value);
console.debug ("copyItThrough () continue;")
continue;
}
}
// TODO: Optimize this so we aren't continuously adding strings together.
console.debug ("copyItThrough () content = partial + chunk;\n partial = \"\";");
content = partial + chunk;
partial = "";
// Is content NOT empty?
if (content.length)
{
console.debug ("copyItThrough () first chunk content.length == [", content.length, "].");
content = await modifyHTMLChunk (content, request, event);
}
}
catch (e)
{
console.debug ("copyItThrough () caught an exception while \"try\"-ing to process the first chunk; ignoring.");
// Ignore the exception.
}
// Is content NOT empty?
if (content.length)
{
console.debug ("copyItThrough () post first chunk content.length == [", content.length, "].\n await-ing writer.write (encoder.encode (content);\n content = \"\";");
await writer.write (encoder.encode (content));
content = "";
}
}
}
catch (e)
{
console.debug ("copyItThrough () caught an exception while \"try\"-ing the \"infinite\" loop (this is the only way out of it); ignoring.");
// Ignore the exception.
}
try
{
console.debug ("copyItThrough () \"try\"-ing: await writer.close ();");
await writer.close ();
}
catch (e)
{
console.debug ("copyItThrough () caught an exception while \"try\"-ing to close the writer; ignoring.");
// Ignore the exception.
}
}
async function modifyHTMLChunk (content, request, event)
{
console.debug ("modifyHTMLChunk () called.");
console.debug ("modifyHTMLChunk () returning content unmodified.");
return content;
}
function chunkContainsInvalidCharset (chunk)
{
console.debug ("chunkContainsInvalidCharset () called.");
let invalid = false;
// meta charset
const charsetRegex = /<\s*meta[^>]+charset\s*=\s*['"]([^'"]*)['"][^>]*>/mgi;
const charsetMatch = charsetRegex.exec (chunk);
// Is the character set specified in the content?
if (charsetMatch)
{
const docCharset = charsetMatch[1].toLowerCase ();
// Is the specified character set NOT in the VALID_CHARSETS list?
if (!VALID_CHARSETS.includes (docCharset))
{
invalid = true;
}
}
// content-type
const contentTypeRegex = /<\s*meta[^>]+http-equiv\s*=\s*['"]\s*content-type[^>]*>/mgi;
const contentTypeMatch = contentTypeRegex.exec(chunk);
// Did the content-type meta present in the content?
if (contentTypeMatch)
{
const metaTag = contentTypeMatch[0];
const metaRegex = /charset\s*=\s*([^\s"]*)/mgi;
const metaMatch = metaRegex.exec (metaTag);
// Is there a character set in the content-type meta?
if (metaMatch)
{
const charset = metaMatch[1].toLowerCase ();
// Is the specified character set NOT in the VALID_CHARSETS list?
if (!VALID_CHARSETS.includes (charset))
{
invalid = true;
}
}
}
return invalid;
}
/****************************
*** Supporting Functions ***
****************************/
/**
* Returns null if the header is not found in headers, else returns the value of the header.
* Can be case sensitive or not (defaults to not).
*
* @param {String} header The name of the header to search for.
* @param {Headers} headers The Headers object to search.
* @param {boolean} caseSensitive (optional; default: false).
*/
function getHeaderValue (header, headers, caseSensitive = false)
{
console.debug ("getHeaderValue (", header,", headers,", caseSensitive, ") called.");
// Are we being case sensitive?
if (caseSensitive)
{
for (pair of headers.entries ())
{
// Did we find the desired header?
if (pair[0].toLowerCase () === header.toLowerCase())
{
return pair[1];
}
}
}
else // We are NOT being case sensitive.
{
for (pair of headers.entries ())
{
// Did we find the desired header?
if (pair[0] === header)
{
return pair[1];
}
}
}
return null;
}
function copyHeaders (headers, exclusions)
{
console.debug ("copyHeaders () called.");
let returnMe = {};
for (pair of headers.entries ())
{
returnMe[pair[0]] = pair[1];
}
return returnMe;
}
function dumpHeadersDebug (headers, from)
{
console.debug (" >>>> " + from + ".headers");
for (pair of headers.entries ())
{
console.debug (pair[0], "==", pair[1]);
}
console.debug (" <<<< " + from + ".headers");
}