When implementing HTTP services in node.js, there are many code examples, as shown below, to get the entire request object (data loaded by the client, for example POST with JSON data):
var http = require('http'); var server = http.createServer(function(req, res) { var data = ''; req.setEncoding('utf8'); req.on('data', function(chunk) { data += chunk; }); req.on('end', function() {
Using req.setEncoding('utf8') automatically decodes the input bytes into a string, assuming the input is encoded in UTF8. But I feel it might break. What if we get a piece of data that ends in the middle of a multibyte UTF8 character? We can imitate this:
> new Buffer("café") <Buffer 63 61 66 c3 a9> > new Buffer("café").slice(0,4) <Buffer 63 61 66 c3> > new Buffer("café").slice(0,4).toString('utf8') 'caf?'
Thus, we get the character erroneous instead of expecting that subsequent bytes will decode the last character correctly.
Therefore, if the request object does not care about this, make sure that only fully decoded characters are inserted into pieces, this universal code sample is broken.
An alternative would be to use buffers that handle the problem of buffer size limits:
var http = require('http'); var MAX_REQUEST_BODY_SIZE = 16 * 1024 * 1024; var server = http.createServer(function(req, res) { // A better way to do this could be to start with a small buffer // and grow it geometrically until the limit is reached. var requestBody = new Buffer(MAX_REQUEST_BODY_SIZE); var requestBodyLength = 0; req.on('data', function(chunk) { if(requestBodyLength + chunk.length >= MAX_REQUEST_BODY_SIZE) { res.statusCode = 413; // Request Entity Too Large return; } chunk.copy(requestBody, requestBodyLength, 0, chunk.length); requestBodyLength += chunk.length; }); req.on('end', function() { if(res.statusCode == 413) { // handle 413 error return; } requestBody = requestBody.toString('utf8', 0, requestBodyLength); // process requestBody as string }); });
Am I right or will this class of http requests already take care?