2021-05-29:最常使用的K个单词II。在实时数据流中找到最常使用的k个单词,实现TopK类中的三个方法: TopK(k), 构造方法。add(word),增加一个新单词。topk(),得到当前最常使用的k个单词。如果两个单词有相同的使用频率,按字典序排名。

福大大 答案2021-05-30:

方法一:
redis的sorted set。hash+跳表实现计数和查找。无代码。
方法二:
节点结构体:有字符串和词频。
词频表:key是字符串,value是节点。
堆:节点数组。刚开始,我以为是大根堆。采用小根堆,如果比堆顶还小,是进不了小根堆的。
反向表:key是节点,value是在堆中的索引。
有代码。

代码用golang编写。代码如下:

package main

import (
    "fmt"
    "sort"
)

func main() {

    a := NewTopK(2)
    a.add("fdd")
    a.add("moon")
    a.add("moonfdd")
    a.add("moonfdd")
    ret := a.topk()
    for i := 0; i < len(ret); i++ {
        fmt.Println(ret[i])
    }

}

type TopK struct {
    //堆
    heap     []*Node
    heapSize int
    //字,次数
    wordNodeMap map[string]*Node
    //反向表
    nodeIndexMap map[*Node]int
}

func NewTopK(k int) *TopK {
    ret := &TopK{}
    ret.heap = make([]*Node, k)
    ret.wordNodeMap = make(map[string]*Node)
    ret.nodeIndexMap = make(map[*Node]int)
    return ret
}

func (this *TopK) add(word string) {
    if len(this.heap) == 0 {
        return
    }
    var curNode *Node
    preIndex := -1
    curNode = this.wordNodeMap[word]

    //词频表 反向表
    if curNode == nil {
        curNode = &Node{word, 1}
        this.wordNodeMap[word] = curNode
        this.nodeIndexMap[curNode] = -1
    } else {
        curNode.Times++
        preIndex = this.nodeIndexMap[curNode]
    }

    //小根堆
    if preIndex == -1 {
        if this.heapSize == len(this.heap) {
            if this.compare(curNode, this.heap[0]) {
                //不用管了
                return
            }
            curNode, this.heap[0] = this.heap[0], curNode
            this.nodeIndexMap[curNode] = -1
            this.nodeIndexMap[this.heap[0]] = 0
            this.HeapDown(0)

        } else {
            this.Push(curNode)
        }
    } else {
        this.HeapDown(preIndex)
    }
}

func (this *TopK) topk() []string {
    heapCopy := make([]*Node, this.heapSize)
    copy(heapCopy, this.heap)
    sort.Slice(heapCopy, func(i, j int) bool {
        return !this.compare(heapCopy[i], heapCopy[j])
    })
    ans := make([]string, this.heapSize)
    for i := 0; i < this.heapSize; i++ {
        ans[i] = heapCopy[i].Str
    }
    return ans
}

type Node struct {
    Str   string
    Times int
}

//索引上移,小根堆
func (this *TopK) HeapUp(index int) {
    for (index-1)/2 != index && !this.compare(this.heap[(index-1)/2], this.heap[index]) { //父节点小于当前节点,当前节点必须上移
        this.heap[index], this.heap[(index-1)/2] = this.heap[(index-1)/2], this.heap[index]
        //加强堆
        this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[(index-1)/2]] = (index-1)/2, index
        index = (index - 1) / 2
    }
}

//索引下沉,小根堆
func (this *TopK) HeapDown(index int) {
    left := 2*index + 1
    for left <= this.heapSize-1 { //左孩子存在
        //获取小孩子
        largest := left
        if left+1 <= this.heapSize-1 && this.compare(this.heap[left+1], this.heap[left]) {
            largest++
        }

        //比较
        if !this.compare(this.heap[index], this.heap[largest]) { //当前大于最小孩子,必须下沉
            this.heap[index], this.heap[largest] = this.heap[largest], this.heap[index]
            //加强堆
            this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[largest]] = largest, index
        } else {
            break
        }

        //下一次遍历
        index = largest
        left = 2*index + 1
    }
}

func (this *TopK) Push(node *Node) {
    this.heap[this.heapSize] = node
    //加强堆
    this.nodeIndexMap[node] = this.heapSize
    //索引上移
    this.HeapUp(this.heapSize)
    this.heapSize++
}

func (this *TopK) Pop() *Node {
    ans := this.heap[0]
    this.heap[0], this.heap[this.heapSize-1] = this.heap[this.heapSize-1], this.heap[0]
    //加强堆
    this.nodeIndexMap[this.heap[0]] = 0
    this.nodeIndexMap[this.heap[this.heapSize-1]] = -1

    this.heapSize--
    //索引下沉
    this.HeapDown(0)
    return ans
}

func (this *TopK) compare(node1 *Node, node2 *Node) bool {
    if node1.Times == node2.Times {
        return node1.Str > node2.Str
    }
    return node1.Times < node2.Times
}

执行结果如下:
在这里插入图片描述

福大大 答案2021-05-29:

方法一:
redis的sorted set。hash+跳表实现计数和查找。无代码。
方法二:
节点结构体:有字符串和词频。
词频表:key是字符串,value是节点。
堆:节点数组。
反向表:key是节点,value是在堆中的索引。
有代码,但不完整,因为时间紧。

代码用golang编写。代码如下:

package main

import "fmt"

func main() {
    a := NewTopK(2)
    a.add("lint")
    a.add("code")
    a.add("code")
    fmt.Println(a.topk())
}

type TopK struct {
    //堆
    heap     []*Node
    heapSize int
    //字,次数
    wordNodeMap map[string]*Node
    //反向表
    nodeIndexMap map[*Node]int
}

func NewTopK(k int) *TopK {
    ret := &TopK{}
    ret.heap = make([]*Node, k)

    return ret
}

func (this *TopK) add(word string) {
    if len(this.heap) == 0 {
        return
    }
    var curNode *Node
    preIndex := -1
    curNode = this.wordNodeMap[word]
    if curNode == nil {
        curNode = &Node{word, 1}
        this.wordNodeMap[word] = curNode
        this.nodeIndexMap[curNode] = -1
    } else {
        //tree set
        curNode.Times++
        preIndex = this.nodeIndexMap[curNode]
    }
    if preIndex == -1 {
        if this.heapSize == len(this.heap) {
            //treeset

        } else {
            //tree add
            this.nodeIndexMap[curNode] = this.heapSize
            this.heap[this.heapSize] = curNode
            this.HeapUp(preIndex)
        }
    } else {
        //tree add
        this.HeapDown(preIndex)
    }
}

func (this *TopK) topk() []string {
    ans := make([]string, len(this.heap))
    return ans
}

type Node struct {
    Str   string
    Times int
}

//索引上移,大根堆
func (this *TopK) HeapUp(index int) {
    for this.heap[(index-1)/2].Times < this.heap[index].Times { //父节点小于当前节点,当前节点必须上移
        this.heap[index], this.heap[(index-1)/2] = this.heap[(index-1)/2], this.heap[index]
        //加强堆
        this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[(index-1)/2]] = (index-1)/2, index
        index = (index - 1) / 2
    }
}

//索引下沉,大根堆
func (this *TopK) HeapDown(index int) {
    left := 2*index + 1
    for left <= this.heapSize-1 { //左孩子存在
        //获取大孩子
        largest := left
        if left+1 <= this.heapSize-1 && this.heap[left+1].Times > this.heap[left].Times {
            largest++
        }

        //比较
        if this.heap[index].Times < this.heap[largest].Times { //当前小于最大孩子,必须下沉
            this.heap[index], this.heap[largest] = this.heap[largest], this.heap[index]
            //加强堆
            this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[largest]] = largest, index
        } else {
            break
        }

        //下一次遍历
        index = largest
        left = 2*index + 1
    }
}

func (this *TopK) Push(node *Node) {
    this.heap[this.heapSize] = node
    //加强堆
    this.nodeIndexMap[node] = this.heapSize
    this.heapSize++
    //索引上移
    this.HeapUp(this.heapSize)
}

func (this *TopK) Pop() *Node {
    ans := this.heap[0]
    this.heap[0], this.heap[this.heapSize-1] = this.heap[this.heapSize-1], this.heap[0]
    //加强堆
    this.nodeIndexMap[this.heap[0]] = 0
    this.nodeIndexMap[this.heap[this.heapSize-1]] = -1
    this.heapSize--
    //索引下沉
    this.HeapDown(0)
    return ans
}

执行结果如下:
图片


左神java代码