var combineByKey = function() { var Tuple = require(EclairJS_Globals.NAMESPACE + '/Tuple'); var pairRdd =sparkContext.parallelizePairs([ new Tuple("coffee", 1), new Tuple("coffee", 2), new Tuple("coffee", 4), new Tuple("pandas", 3) ]); var avgCounts = pairRdd.combineByKey( function(x, Tuple) { // createAcc return new Tuple(x, 1); }, function(tuple, x) { // addAndCount tuple[0] += x; // total tuple[1] += 1; // num return tuple; }, function(t1, t2) { // combine t1[0] += t2[0]; // total t1[1] += t2[1]; // num return t1; }, 1, [Tuple]); var countMap = avgCounts.collectAsMap(); return JSON.stringify(countMap); }
var countByKey = function() { var Tuple = require(EclairJS_Globals.NAMESPACE + '/Tuple'); var pairRdd =sparkContext.parallelizePairs([ new Tuple("coffee", 1), new Tuple("coffee", 2), new Tuple("coffee", 4), new Tuple("pandas", 3) ]); var count = pairRdd.countByKey(); return JSON.stringify(count); }