Dc.js creates dataTable with minimum avg values for three columns

Question

Dc.js creates dataTable with minimum avg values for three columns

Trying to create dT / dc / xfilter dataTable with minimum, maximum and average values for 3 columns in the data samples. I struggled a lot, but could not figure out how to integrate the functions reduceAdd, reduceRemove, reduceInitial into dataTable to create the three necessary rows.

The desired output will look something like this:

------------------------------------------ | Value | Cars | Bikes | Trucks | ------------------------------------------ | Min | 125 | 310 | 189 | ------------------------------------------ | Max | 230 | 445 | 290 | ------------------------------------------ | Avg | 178 | 385 | 245 | ------------------------------------------

It is also not visible how to add the first column (label). I know that reduceInitial can return an array (for example, ['min', 'max', 'avg'] ), but how to refer to labels from it?

 var myCSV = [ {"shift":"1","date":"01/01/2016/08/00/00","car":"178","truck":"255","bike":"317","moto":"237"}, {"shift":"2","date":"01/01/2016/17/00/00","car":"125","truck":"189","bike":"445","moto":"273"}, {"shift":"3","date":"02/01/2016/08/00/00","car":"140","truck":"219","bike":"328","moto":"412"}, {"shift":"4","date":"02/01/2016/17/00/00","car":"222","truck":"290","bike":"432","moto":"378"}, {"shift":"5","date":"03/01/2016/08/00/00","car":"200","truck":"250","bike":"420","moto":"319"}, {"shift":"6","date":"03/01/2016/17/00/00","car":"230","truck":"220","bike":"310","moto":"413"}, {"shift":"7","date":"04/01/2016/08/00/00","car":"155","truck":"177","bike":"377","moto":"180"}, {"shift":"8","date":"04/01/2016/17/00/00","car":"179","truck":"203","bike":"405","moto":"222"}, {"shift":"9","date":"05/01/2016/08/00/00","car":"208","truck":"185","bike":"360","moto":"195"}, {"shift":"10","date":"05/01/2016/17/00/00","car":"150","truck":"290","bike":"315","moto":"280"}, {"shift":"11","date":"06/01/2016/08/00/00","car":"200","truck":"220","bike":"350","moto":"205"}, {"shift":"12","date":"06/01/2016/17/00/00","car":"230","truck":"170","bike":"390","moto":"400"}, ]; dataTable = dc.dataTable('#dataTable'); lc1 = dc.lineChart("#line1"); lc2 = dc.lineChart("#line2"); lc3 = dc.lineChart("#line3"); var dateFormat = d3.time.format("%d/%m/%Y/%H/%M/%S"); myCSV.forEach(function (d) { d.date = dateFormat.parse(d.date); }); myCSV.forEach(function (d) { d['car'] = +d['car']; d['bike'] = +d['bike']; d['moto'] = +d['moto']; }); //console.log(myCSV); var facts = crossfilter(myCSV); var dateDim = facts.dimension(function (d) {return d.date}); var carDim = facts.dimension(function (d) {return d['car']}); var dgCar = dateDim.group().reduceSum(function (d) {return d['car']}); var bikeDim = facts.dimension(function (d) {return d['bike']}); var dgBike = dateDim.group().reduceSum(function (d) {return d['bike']}); var motoDim = facts.dimension(function (d) {return d['moto']}); var dgMoto = dateDim.group().reduceSum(function (d) {return d['moto']}); var minDate = new Date ("2016-01-01T08:00:00.000Z"); var maxDate = new Date ("2016-01-03T17:00:00.000Z"); var maxY = d3.max(myCSV, function(d) {return d['car']}); function reduceAdd(i,d){ return i+1; } function reduceRemove(i,d){return i-1; } function reduceInitial(){ return ['min','max','avg'];} dataTable .width(jsTablWidth) .height(400) .dimension(dateDim) .group( function(d){return '';} ) .columns([ { label: 'Value', format: function(d) { return dateGroup1.reduce(reduceAdd,reduceRemove,reduceInital); } }, { label: tSel1.replace(/_/g, " "), format: function(d) { return //avg cars ; } }, { label: tSel2.replace(/_/g, " "), format: function(d) { return //avg bikes ; } }, { label: tSel3.replace(/_/g, " "), format: function(d) { return //avg moto; } } ]); dc.renderAll(); dc.redrawAll();

 svg{height:280px;}

 <script src="http://cdnjs.cloudflare.com/ajax/libs/d3/3.3.3/d3.min.js"></script> <script src="http://cdnjs.cloudflare.com/ajax/libs/crossfilter/1.3.1/crossfilter.min.js"></script> <script src="http://dc-js.imtqy.com/dc.js/js/dc.js"></script> <link href="http://dc-js.imtqy.com/dc.js/css/dc.css" rel="stylesheet"/> <svg id="dataTable"></svg> <svg id="line1"></svg> <svg id="line2"></svg> <svg id="line3"></svg>

+5

dc.js crossfilter

crashwap Feb 27 '17 at 0:04

source share

2 answers

This is another solution that gives the result closer to the requested ones, but with much more code than Gordon’s.

Introduction

I agree with Gordon that there is no sensible way to achieve what you want directly with crossfilter . crossfilter is row oriented and you want to create multiple rows based on columns. Thus, the only way is to take some kind of “fake” step. And a “fake” step implicitly means that the result will not be updated when the original data source changes. I see no way to fix this, since crossfilter hides its executable files well enough (e.g. filterListeners , dataListeners and removeDataListeners ).

However, dc is implemented in such a way that by default, after various events, all diagrams are redrawn (because they are all in the same global group). And because of this, “fake objects”, if implemented correctly, can also be recalculated based on updated data.

So my code contains two implementations for min / max:

fast (er), but unsafe if you do not do any additional filter
slow (er), but safe if you want to do additional filtering

Please note that if you used a fast but unsuccessful implementation and perform additional filtering, you will get exceptions, and other functions may also be broken.

code

All code is available at https://jsfiddle.net/4kcu2ut1/1/ . Let me divide it into logical blocks and see them one by one.

First go to the helper methods and objects. Each Op object essentially contains the methods necessary to switch to reduce + an additional optional getOutput if the drive contains more data, and then just the result, for example, for avgOp minimum / maximum "safe" operations.

 var minOpFast = { add: function (acc, el) { return Math.min(acc, el); }, remove: function (acc, el) { throw new Error("Not supported"); }, initial: function () { return Number.MAX_VALUE; } }; var maxOpFast = { add: function (acc, el) { return Math.max(acc, el); }, remove: function (acc, el) { throw new Error("Not supported"); }, initial: function () { return Number.MIN_VALUE; } }; var binarySearch = function (arr, target) { var lo = 0; var hi = arr.length; while (lo < hi) { var mid = (lo + hi) >>> 1; // safe int division if (arr[mid] === target) return mid; else if (arr[mid] < target) lo = mid + 1; else hi = mid; } return lo; }; var minOpSafe = { add: function (acc, el) { var index = binarySearch(acc, el); acc.splice(index, 0, el); return acc; }, remove: function (acc, el) { var index = binarySearch(acc, el); acc.splice(index, 1); return acc; }, initial: function () { return []; }, getOutput: function (acc) { return acc[0]; } }; var maxOpSafe = { add: function (acc, el) { var index = binarySearch(acc, el); acc.splice(index, 0, el); return acc; }, remove: function (acc, el) { var index = binarySearch(acc, el); acc.splice(index, 1); return acc; }, initial: function () { return []; }, getOutput: function (acc) { return acc[acc.length - 1]; } }; var avgOp = { add: function (acc, el) { acc.cnt += 1; acc.sum += el; acc.avg = acc.sum / acc.cnt; return acc; }, remove: function (acc, el) { acc.cnt -= 1; acc.sum -= el; acc.avg = acc.sum / acc.cnt; return acc; }, initial: function () { return { cnt: 0, sum: 0, avg: 0 }; }, getOutput: function (acc) { return acc.avg; } };

Then we prepare the initial data and indicate the transformation that we want. aggregates is a list of operations from the previous step, additionally decorated with a key for storing temporary data in a composite battery (it must be unique) and a label for displaying on the output. srcKeys contains a list of property names (all of which must have the same form) that will be processed by each operation from aggregates lits.

 var myCSV = [ {"shift": "1", "date": "01/01/2016/08/00/00", "car": "178", "truck": "255", "bike": "317", "moto": "237"}, {"shift": "2", "date": "01/01/2016/17/00/00", "car": "125", "truck": "189", "bike": "445", "moto": "273"}, {"shift": "3", "date": "02/01/2016/08/00/00", "car": "140", "truck": "219", "bike": "328", "moto": "412"}, {"shift": "4", "date": "02/01/2016/17/00/00", "car": "222", "truck": "290", "bike": "432", "moto": "378"}, {"shift": "5", "date": "03/01/2016/08/00/00", "car": "200", "truck": "250", "bike": "420", "moto": "319"}, {"shift": "6", "date": "03/01/2016/17/00/00", "car": "230", "truck": "220", "bike": "310", "moto": "413"}, {"shift": "7", "date": "04/01/2016/08/00/00", "car": "155", "truck": "177", "bike": "377", "moto": "180"}, {"shift": "8", "date": "04/01/2016/17/00/00", "car": "179", "truck": "203", "bike": "405", "moto": "222"}, {"shift": "9", "date": "05/01/2016/08/00/00", "car": "208", "truck": "185", "bike": "360", "moto": "195"}, {"shift": "10", "date": "05/01/2016/17/00/00", "car": "150", "truck": "290", "bike": "315", "moto": "280"}, {"shift": "11", "date": "06/01/2016/08/00/00", "car": "200", "truck": "220", "bike": "350", "moto": "205"}, {"shift": "12", "date": "06/01/2016/17/00/00", "car": "230", "truck": "170", "bike": "390", "moto": "400"}, ]; var dateFormat = d3.time.format("%d/%m/%Y/%H/%M/%S"); myCSV.forEach(function (d) { d.date = dateFormat.parse(d.date); d['car'] = +d['car']; d['bike'] = +d['bike']; d['moto'] = +d['moto']; d['truck'] = +d['truck']; d.shift = +d.shift; }); //console.table(myCSV); var aggregates = [ // not compatible with addtional filtering /*{ key: 'min', label: 'Min', agg: minOpFast },**/ { key: 'minSafe', label: 'Min Safe', agg: minOpSafe }, // not compatible with addtional filtering /*{ key: 'max', label: 'Max', agg: maxOpFast },*/ { key: 'maxSafe', label: 'Max Safe', agg: maxOpSafe }, { key: 'avg', agg: avgOp, label: 'Average' } ]; var srcKeys = ['car', 'bike', 'moto', 'truck'];

And now to the magic . buildTransposedAggregatesDimension is what all the hard work does here. Essentially, it takes two steps:

First, groupAll get aggregated data for each combination in the cross product of all operands and all keys.
Split the grouped mega object into an array that can be a data source for another crossfilter

Step number 2, where my "fake". It seems to me that it is less "fake" than in the Gordon solution, since it does not rely on any internal details of crossfilter or dc (see the bottom Method in the Gordon solution).

Also the separation in step 2 is where the data is actually transferred according to your requirements. Obviously, the code can be easily modified to not do this and produce results in the same way as in the Gordon solution.

Please also note that it is important that the additional step does not do additional calculations and only converts the already calculated values to the appropriate format. This is important for updating after filtering to work, because in such a table related to the result of buildTransposedAggregatesDimension , buildTransposedAggregatesDimension still effectively bound to the original data crossfilter .

 var buildTransposedAggregatesDimension = function (facts, keysList, aggsList) { // "grouped" is a single record with all aggregates for all keys computed var grouped = facts.groupAll() .reduce( function add(acc, el) { aggsList.forEach(function (agg) { var innerAcc = acc[agg.key]; keysList.forEach(function (key) { var v = el[key]; innerAcc[key] = agg.agg.add(innerAcc[key], v); }); acc[agg.key] = innerAcc; }); return acc; }, function remove(acc, el) { aggsList.forEach(function (agg) { var innerAcc = acc[agg.key]; keysList.forEach(function (key) { var v = el[key]; innerAcc[key] = agg.agg.remove(innerAcc[key], v); }); acc[agg.key] = innerAcc; }); return acc; }, function initial() { var acc = {}; aggsList.forEach(function (agg) { var innerAcc = {}; keysList.forEach(function (key) { innerAcc[key] = agg.agg.initial(); }); acc[agg.key] = innerAcc; }); return acc; }).value(); // split grouped back to array with element for each aggregation function var groupedAsArr = []; aggsList.forEach(function (agg, index) { groupedAsArr.push({ sortIndex: index, // preserve index in aggsList so we can sort by it later //agg: agg, key: agg.key, label: agg.label, valuesContainer: grouped[agg.key], getOutput: function (columnKey) { var aggregatedValueForKey = grouped[agg.key][columnKey]; return agg.agg.getOutput !== undefined ? agg.agg.getOutput(aggregatedValueForKey) : aggregatedValueForKey; } }) }); return crossfilter(groupedAsArr).dimension(function (el) { return el; }); };

The small helper method buildColumns creates columns for each source key in srcKeys + an additional column for the operation label

 var buildColumns = function (srcKeys) { var columns = []; columns.push({ label: "Aggregate", format: function (el) { return el.label; } }); srcKeys.forEach(function (key) { columns.push({ label: key, format: function (el) { return el.getOutput(key); } }); }); return columns; };

So now let's put everything together and create a table.

 var facts = crossfilter(myCSV); var aggregatedDimension = buildTransposedAggregatesDimension(facts, srcKeys, aggregates); dataTable = dc.dataTable('#dataTable'); // put such a <table> in your HTML! dataTable .width(500) .height(400) .dimension(aggregatedDimension) .group(function (d) { return ''; }) .columns(buildColumns(srcKeys)) .sortBy(function (el) { return el.sortIndex; }) .order(d3.ascending); //dataTable.render(); dc.renderAll();

There is another piece of code shamelessly stolen from Gordon to add a line chart for additional filtering.

+4

Serggr Mar 03 '17 at 2:52

source share

Gordon · Accepted Answer · 2017-03-03T01:13:25+0000

Well, I hope that everything is fine with transposing the table diagonally, translating the routes as rows instead of columns. This decision is already rather messy, apart from this.

There is really no way to calculate min and max, except to keep track of all the values. Therefore, we are going to use abbreviations from the example of complex abbreviations . They actually don't reduce at all, but support a sorted array of filtered strings.

We need a unique key to save the sorted array (so that we delete the correct row. Fortunately, you have this in the shift field.

So, here are those functions, or rather, the functions that generate reducers with a unique access key.

  function groupArrayAdd(keyfn) { var bisect = d3.bisector(keyfn); return function(elements, item) { var pos = bisect.right(elements, keyfn(item)); elements.splice(pos, 0, item); return elements; }; } function groupArrayRemove(keyfn) { var bisect = d3.bisector(keyfn); return function(elements, item) { var pos = bisect.left(elements, keyfn(item)); if(keyfn(elements[pos])===keyfn(item)) elements.splice(pos, 1); return elements; }; } function groupArrayInit() { return []; }

Since they contain links to all lines, we need only one group; we will use more specific accessors when we calculate the indicators below.

Here we want crossfilter.groupAll , which reduces everything to one bunker. This is because the lines are not separated by any keys; each row contributes to all modes of transport:

 var filteredRows = facts.groupAll().reduce( groupArrayAdd(dc.pluck('shift')), groupArrayRemove(dc.pluck('shift')), groupArrayInit );

Now the most absurd part. We are going to create the fakest dimension object you have ever seen. The important thing is that this is an object with the .bottom() method, which dynamically computes each of the lines:

 var fakeDim = { bottom: function() { return [ {key: 'Car', value: filteredRows.value(), acc: dc.pluck('car')}, {key: 'Truck', value: filteredRows.value(), acc: dc.pluck('car')}, {key: 'Bike', value: filteredRows.value(), acc: dc.pluck('bike')}, {key: 'Moto', value: filteredRows.value(), acc: dc.pluck('moto')} ]; } };

Also, wait, doesn’t it seem like it does any calculations at all, just getting the values? And what is this weird acc ?

Well, we produce exactly the initial data that we need to create the rows of the table, and we will use the following format accessors to really calculate everything. We will use key for the label column, we will store the original rows in the value element; And we will provide acc accessory for calculating indicators.

The data table definition is as follows:

 dataTable .width(400) .height(400) .dimension(fakeDim) .group( function(d){return '';} ) .columns([ { label: 'Value', format: function(d) { return d.key; } }, { label: 'Min', format: function(d) { return d3.min(d.value, d.acc); } }, { label: 'Max', format: function(d) { return d3.max(d.value, d.acc); } }, { label: 'Avg', format: function(d) { return d3.mean(d.value, d.acc); } } ]);

Here, all calculations will be finalized. We will have all the rows available, and we have an accessor for each row of the table. d3-array has convenient functions for calculating the min, max and average array. Boom done.

I threw a complicated diagram into this fiddle for testing. (I know that stacking these values probably doesn't make sense; it just helps to filter.)

http://jsfiddle.net/gordonwoodhull/g4xqvgvL/21/

Rotation dataTable

The extra generosity reminded me that I had never solved the problem of migrating a table, so I thought I would look, because it's fun. I still think that generosity should go to @SergGr, but here is a solution for moving a table based on categories, dimensions and column accessories / formats.

First we need a list of categories, so let the structure of categories and fields be a little better:

 var categories = { Car: 'car', Truck: 'truck', Bike: 'bike', Moto: 'moto' };

Now the fake dimension can be simplified because it is generated from this category:

 function fake_dimension(cats) { return { bottom: function() { return Object.keys(cats).map(function(k) { return { key: k, value: filteredRows.value(), acc: dc.pluck(cats[k]) }; }); } }; } var fakeDim = fake_dimension(categories);

We need to derive the definitions of the columns from the definition of the chart, because we are going to convert them:

 var columns = [ { label: 'Value', format: function(d) { return d.key; } }, { label: 'Min', format: function(d) { return d3.min(d.value, d.acc); } }, { label: 'Max', format: function(d) { return d3.max(d.value, d.acc); } }, { label: 'Avg', format: function(d) { return d3.mean(d.value, d.acc); } } ];

Finally, we can write the transpose function:

 function transpose_datatable(cats, dim, cols) { var cols2 = d3.map(cols, function(col) { // 1 return col.label; }); return { dim: { // 2 bottom: function() { var dall = d3.map(dim.bottom(Infinity), function(row) { // 3 return row.key; }); return cols.slice(1).map(function(col) { // 4 var row = { label: col.label }; Object.keys(cats).forEach(function(k) { row[k] = dall.get(k); }); return row; }); } }, cols: [ // 5 { label: cols[0].label, format: function(d) { return d.label; } } ].concat(Object.keys(cats).map(function(k) { // 6 return { label: k, format: function(d) { return cols2.get(d.label).format(d[k]); } } })) }; } var transposed = transpose_datatable(categories, fakeDim, columns)

First, we need a map of the source columns for their definitions, because they will become rows. Here we can use d3.map , which acts as a well-executed JavaScript object.
We are going to create a new fake dimension and a new array of column definitions. The fake dimension has only the .bottom() method, like the previous one.
In the definition of .bottom() will need all the source data indexed by key (category name). So we add this to the d3.map object.
Now we can create fake measurement data. There were only headings in the first column (which will now be the column headings), so we will skip this. The data for the row will be the new name (old column label) and field for each category. Fields are filled with rows from the original dimension.
In new column definitions, you must replace the label, column, and the rest from the category names.
The label for each column is now the category name, and .format() calls the original format column, retrieving the data using the category name.

New screenshot:

Dc.js creates dataTable with minimum avg values ​​for three columns

Rotation dataTable

More articles:

Dc.js creates dataTable with minimum avg values for three columns