aggregateByKey
def aggregateByKey[U: ClassTag](zeroValue: U)(seqOp: (U, V) => U,combOp: (U, U) => U): RDD[(K, U)]
//将数据分成4个分区
var z=sc.makeRDD(List(("a",1),("b",2),("a",2),("a",3),("b",0),("c",1),("c",2),("c",9)),4)
//查看每个分区的数据
z.glom.collect
// Array[Array[(String, Int)]] = Array(Array((a,1), (b,2)), Array((a,2), (a,3)), Array((b,0), (c,1)), Array((c,2), (c,9)))
z.aggregateByKey(0)(Math.max(_,_),_+_).collect
//Array[(String, Int)] = Array((a,4), (b,2), (c,10))
var z=sc.makeRDD(List(("a",1),("b",2),("a",2),("a",3),("b",0),("c",1),("c",2),("c",9)),1)
//由于只有一个分区,所以_*_没执行
z.aggregateByKey(1)(_+_,_*_).collect// Array((a,7), (b,3), (c,13))
var z=sc.makeRDD(List(("a",1),("b",1),("a",1),("a",1),("b",1),("c",1),("c",1),("c",1)),1)
//几个分区不重要
z.aggregateByKey(0)(_+_,_+_).collect//Array((a,3), (b,2), (c,3))
aggregateByKey
def aggregateByKey[U: ClassTag](zeroValue: U)(seqOp: (U
V) => U
combOp: (U
U) => U): RDD[(K
U)]
上一篇:linux定时任务contab