Return min, max, std (deviation), sum, mean, median of multiple columns

Hi all,

I am new in d3.js started learning two-three weeks ago and would like to know how I can aggregate multiple columns of a json file that is loaded?
Here is what I tried so far:

d3.json("https://data.cityofnewyork.us/resource/jb7j-dtam.json").then(loadData);
function loadData(data) {
      let aggregate = ExtentValues(data);
      return aggregate; 
}

const ExtentValues = function(data) {
      let min = d3.min(data, (fn) => {return [fn. year, fn.deaths, fn.death_rate, fn.age_adjusted_death_rate] });
      let max = d3.max(data, (fn) => {return [fn. year, fn.deaths, fn.death_rate, fn.age_adjusted_death_rate] });
      let mean = d3.mean(data, (fn) => {return [fn.deaths, fn.death_rate, fn.age_adjusted_death_rate] });
      let median = d3.median(data, (fn) => {return [fn.deaths, fn.death_rate, fn.age_adjusted_death_rate] });
      let sum = d3.sum(data, (fn) => {return [fn.deaths, fn.death_rate, fn.age_adjusted_death_rate] });
      let std = d3.deviation(data, (fn) => {return [fn.deaths, fn.death_rate, fn.age_adjusted_death_rate] });

return ["Minimum: ", min, "Maximum: ", max, "Mean/Avg: ", Number(d3.format(",.3f")(mean)), "Median: ", median, "Summary: ", sum, "Std: ",Number(d3.format(",.3f")(std))]
}

// The output  min and max returns the expected values, but the rest are NaN 

I browsed on internet to find out a solution for that, but there is not much information about.

Thanks in advance.

Best regards,
Kanatbek

Hi, I’ve loaded the dataset in this notebook Aggregating data / recifs / Observable to try and answer, but it would be useful to get more details about the type of results you are looking for.

To get the average age_adjusted_death_rate, for example, you could write

d3.mean(data, (d) => +d.age_adjusted_death_rate)

but this is an average across all rows, and I’m not sure about what it might mean as a statistics.

Don’t hesitate to play with that notebook to get a sense of how d3 functions work.

Hi Fil,

Thanks for your reply. Indeed, the data aggregation perfectly works for a single column as you showed and it is also undeniable. I am trying to aggregate multiple columns instead of typing each column seperately that I have only succeeded with min and max.
I think there is a way to get average, median, sum, std values of all numeric column values in d3.js like in other programming languages e.g. (Py, R, Java). By iterating the data I got null for all columns.
Or by nesting the data to get aggregation of multiple columns the result undefined:

const ExtentValues = function(data) {
    let metrics = d3.nest()
          .key(function(v) { return v.year; })
          .rollup(function(dv) { return {
          min : d3.min(dv, fn => {+fn.deaths, +fn.death_rate, +fn.age_adjusted_death_rate} ),
          max : d3.max(dv, fn => {+fn.deaths, +fn.death_rate, +fn.age_adjusted_death_rate} ),
          avg: d3.mean(dv, fn => {+fn.deaths, +fn.death_rate, +fn.age_adjusted_death_rate} ),
          median: d3.median(dv, fn => {+fn.deaths, +fn.death_rate, +fn.age_adjusted_death_rate} ),
          std : d3.deviation(dv, fn => {+fn.deaths, +fn.death_rate, +fn.age_adjusted_death_rate} ),
          sum: d3.sum(dv, fn => {+fn.deaths, +fn.death_rate, +fn.age_adjusted_death_rate} )
          };
       }).entries(data).sort((a, b) => b.year - a.year);   
    return metrics;
}
// result undefined of sorted in descending order column year  

The purpose of the aggregation multiple numeric column values is I would like to display an exploration statistics on a page when the users mouse over the column.

Can you give me a bit direction or sources to solve it?

Kind regards,
Kanatbek

you would have to iterate over the column names; maybe something like

columns.map(column => d3.mean(data, (d) => +d[column]))

I tried to iterate over the column names as mentioned above, however, the result is null e.g.

const ExtentValues = function(data) {
  let lst_keys = [ "deaths", "death_rate", "age_adjusted_death_rate"];
  for (let k=0; k< lst_keys.length; k++)
      for (let m=0; m< data.length; m++) {
        if (lst_keys[k] === data[m] || lst_keys[k] in data[m] ) {
          let min = d3.min(data, (fn) => {return fn.lst_keys[k]  });
          return min;
}
}
}

Having tried this one, I have undefined, either.

data.forEach(function(da) {
    da.map(column => d3.mean(data, (d) => +d[column]))
})

If you don’t mind, can you share your outputs with me?

Best regards,

Sure: Aggregating data / recifs / Observable

Thanks Fil for the direction how to iterate columns in d3.js. In the notebook of d3.js you code works perfectly and I am getting the point behind the code.
However, if I run your code on my PC (win10) D3.js V5, the code does not work, namely it throws traceback either d3.count() and d3.min, max, mean etc are a function- strange.

I also tried to check the logic of the code in the notebook you created, but it failed too. Here is a screenshot:

Any ideas?

Best regards

My code works with d3 version 7.

I downloaded d3 v7 from the website d3, and the code outputs undefined even if I return the value.

you need to add a return on line 3, before lst_keys.map

In this case it throws the traceback SyntaxError unexpected “.” return lst_keys.map(col => { })

I am getting the expected result for grouped/ nested values by a column

//  grouped by Year
const groupedValues = function(data) {
          let metrics = d3.nest()
                  .key(function(col) { return col.year; })
                  .sortKeys(d3.descending)
                  .rollup(function(dv) { return {
                        metircs : lst_keys.map(column => {
                          return {
                              column,
                              rows: data.length,
                              min: d3.min(dv, (fn) => {return +fn[column] }),
                              max: d3.max(dv, (fn) => {return +fn[column] }),
                              avg: d3.mean(dv, (fn) => {return +fn[column] }),
                              median: d3.median(dv, (fn) => {return +fn[column] }),
                              std: d3.deviation(dv, (fn) => {return +fn[column] }),
                              total: d3.sum(dv, (fn) => {return +fn[column] }),
                              // cumsum: d3.cumsum(dv, (fn) => {return +fn[column] })
                };
          })
          };
          }).entries(data)
           return metrics;
}

But the same logic does not work for each column - general metrics:

/ Trying to get metrics  without grouping by year, sex leading_causes
let lst_keys = ["deaths", "death_rate", "age_adjusted_death_rate"];
const ExtentValues= function(value) {
                lst_keys.map(function(col) {
                   const val = d3.map(value, (out) => +out[col]);
                       return {
                          col,
                          rows: val.length,     
                          min: d3.min(val),
                          max: d3.max(val),
                          avg: d3.mean(val),
                         std: d3.deviation(val),
                         total: d3.sum(val),
                     // cumsum: d3.cumsum(val)  // traceback d3.cumsum() is not a function
              };
             return val;
          });
     }

I appreciate any hint.