该数据集都为:“http://bigdata.edu360.cn/laozhou” 这个样子,要求的就是最受欢迎的老师
分组TOPN算法:
object FavTeacher {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("FavTeacher").setMaster("local")
val sc = new SparkContext(conf)
//指定以后从哪里读取数据
val lines = sc.textFile(args(0))
//整理数据
val teacherAndOne = lines.map(line => {
//val line = "http://bigdata.edu360.cn/laozhou"
val conSubject = line.split("/")(2)
val subject =conSubject.split("[.]")(0)
val teacher = line.split("/")(3)
(teacher, 1)
})
//聚合
val reduced = teacherAndOne.reduceByKey(_+_)
//排序
val sorted = reduced.sortBy(_._2, false)
//触发Action执行计算
val result = sorted.collect()
//打印
println(result.toBuffer)
sc.stop()
}
}