7.7.5 普通 WordCount 案例
package chapter07
object TestWordCount__简单版 {
def main(args: Array[String]): Unit = {
val stringList = List("Hello Scala Hbase kafka", "Hello Scala Hbase", " Hello Scala", "Hello")
val wordList:List[String] = stringList.flatMap(str => str.split(" "));
println(wordList)
println("------------------------------------------------------------------------------------------")
val wordSame:Map[String,List[String]] = wordList.groupBy(word => word)
println(wordSame)
println("------------------------------------------------------------------------------------------")
val wordToCount:Map[String,Int] = wordSame.map(tuple=>(tuple._1,tuple._2.size))
println(wordToCount)
println("------------------------------------------------------------------------------------------")
val sortList:List[(String,Int)] = wordToCount.toList.sortWith{
(A,B) =>{
A._2>B._2
}
}
println(sortList)
println("------------------------------------------------------------------------------------------")
val resThreeList:List[(String,Int)] = sortList.take(3)
println(resThreeList)
}
}
7.7.6 复杂 WordCount 案例
TestWordCount__复杂版__方式01
package chapter07
object TestWordCount__复杂版__方式01 {
def main(args: Array[String]): Unit = {
val tupleList = List(("Hello Scala Spark World ", 4), ("Hello Scala Spark", 3), (" Hello Scala", 2), ("Hello", 1))
val stringList:List[String] = tupleList.map(t => (t._1 + " ") * t._2)
val words:List[String] = stringList.flatMap(s=>s.split(" "))
val groupMap:Map[String,List[String]] = words.groupBy(word => word)
val wordToCount:Map[String,Int] = groupMap.map(t=>(t._1,t._2.size))
val wordCountList:List[(String,Int)] = wordToCount.toList.sortWith{
(left,right) =>{
left._2 > right._2
}
}.take(3)
tupleList.map(t=>(t._1+" ")*t._2).flatMap(_.split(" ").groupBy(word=>word).map(t=>(t._1,t._2.size)))
println(wordCountList)
}
}
TestWordCount__复杂版__方式02
package chapter07
object TestWordCount__复杂版__方式02 {
def main(args: Array[String]): Unit = {
val tuples = List(("Hello Scala Hbase kafka",4),( "Hello Scala Hbase",3),(" Hello Scala",2),( "Hello",1))
val wordToCountList:List[(String,Int)] = tuples.flatMap{
t =>{
val strings:Array[String] = t._1.split(" ")
strings.map(word=>(word,t._2))
}
}
val wordToTuoleMap:Map[String,List[(String,Int)]] = wordToCountList.groupBy(t=>t._1)
val stringToInts:Map[String,List[Int]] = wordToTuoleMap.mapValues{
datas=> datas.map(t=>t._2)
}
stringToInts
val wordToCountMap:Map[String,List[Int]] = wordToTuoleMap.map{
t=>{
(t._1,t._2.map(t1 => t1._2))
}
}
val wordToTotalCountMap:Map[String,Int] = wordToTotalCountMap.map(t=>(t._1,t._2))
println(wordToCountMap)
}
}