http://www.ibm.com/developerworks/cn/java/j-lo-funinscala2/
输入数据(data.tsv):
简单的实现:
object Demo {
def main(args: Array[String]) {
val input = Source.fromFile("/home/xring/Desktop/data.tsv").getLines().toVector
val grouped = input.map { line =>
val parts = line.split("\t")
(parts.take(2).mkString("\t"), (parts(2).toInt, parts(3).toInt, parts(4).toInt))
}.groupBy(_._1).map(x => (x._1, x._2.map(_._2)))
val res = grouped.map(x => (x._1, x._2.reduce((a, b) => (a._1 + b._1, a._2 + b._2, a._3 + b._3))))
.toVector.sorted
.map(x => x._1 + "\t" + x._2._1 + "\t" + x._2._2 + "\t" + x._2._3)
res.foreach(println)
}
}
输出: