I am doing some simple data validation on a large dataset in Node.js (version v7.5.0 with a matrix of 15849x12771 entries). The entire dataset is currently in memory for performance reasons. Therefore, it is important for me to reduce the amount of memory consumed to a theoretical minimum (each number representing 8 bytes in JS).
Please compare the following ways to achieve the same.
with forEach
regressData.forEach((yxa, yxaIndex) => { yxa.forEach((yx, yxIndex) => { if (!_.isFinite(yx)) { throw new Error(`non-finite entry at [${yxaIndex}, ${yxIndex}]`); } }); });
This consumes all the memory of my 4GB + node process, forcing it to never (until my patience is over) still end the loop (I assume it will use a slower swap memory).
And then an identical version with typical for
:
for (var yxai = 0, yxal = regressData.length; yxai < yxal; yxai++) { const yx = regressData[yxai]; for (var yxi = 0, yxl = yx.length; yxi < yxl; yxi++) { if (!_.isFinite(yx[yxi])) { throw new Error(`non-finite entry at [${yxai}, ${yxi}]`); } } }
This consumes virtually no additional memory, forcing you to check in less than a second.
Is this behavior expected? I expected that since forEach
has closed areas, there would be no problem with extra memory usage compared to the traditional for
loop.
EDIT: standalone test
node --expose-gc test_foreach.js
if (!gc) throw new Error('please run node like node --expose-gc test_foreach.js'); const _ = require('lodash'); // prepare data to work with const x = 15849; const y = 12771; let regressData = new Array(x); for (var i = 0; i < x; i++) { regressData[i] = new Array(y); for (var j = 0; j < y; j++) { regressData[i][j] = _.random(true); } } // for loop gc(); const mb_pre_for = _.round(process.memoryUsage().heapUsed / 1024 / 1024, 2); console.log(`memory consumption before for loop ${mb_pre_for} megabyte`); validateFor(regressData); gc(); const mb_post_for = _.round(process.memoryUsage().heapUsed / 1024 / 1024, 2); const mb_for = _.round(mb_post_for - mb_pre_for, 2); console.log(`memory consumption by for loop ${mb_for} megabyte`); // for each loop gc(); const mb_pre_foreach = _.round(process.memoryUsage().heapUsed / 1024 / 1024, 2); console.log(`memory consumption before foreach loop ${mb_pre_foreach} megabyte`); validateForEach(regressData); gc(); const mb_post_foreach = _.round(process.memoryUsage().heapUsed / 1024 / 1024, 2); const mb_foreach = _.round(mb_post_foreach - mb_pre_foreach, 2); console.log(`memory consumption by foreach loop ${mb_foreach} megabyte`); function validateFor(regressData) { for (var yxai = 0, yxal = regressData.length; yxai < yxal; yxai++) { const yx = regressData[yxai]; for (var yxi = 0, yxl = yx.length; yxi < yxl; yxi++) { if (!_.isFinite(yx[yxi])) { throw new Error(`non-finite entry at [${yxai}, ${yxi}]`); } } } }; function validateForEach(regressData) { regressData.forEach((yxa, yxaIndex) => { yxa.forEach((yx, yxIndex) => { if (!_.isFinite(yx)) { throw new Error(`non-finite entry at [${yxaIndex}, ${yxIndex}]`); } }); }); };
Output:
toms-mbp-2:mem_test tommedema$ node --expose-gc test_foreach.js memory consumption before for loop 1549.31 megabyte memory consumption by for loop 0.31 megabyte memory consumption before foreach loop 1549.66 megabyte memory consumption by foreach loop 3087.9 megabyte