2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: TopK(k)，构造方法。add(word)，增加一个新单词。topk()，得到当前最常使用的k个单词。如果两个单词有相同的使用频率，按字典序排名。

福大大答案2021-05-30：

方法一：
redis的sorted set。hash+跳表实现计数和查找。无代码。
方法二：
节点结构体：有字符串和词频。
词频表：key是字符串，value是节点。
堆：节点数组。刚开始，我以为是大根堆。采用小根堆，如果比堆顶还小，是进不了小根堆的。
反向表：key是节点，value是在堆中的索引。
有代码。

代码用golang编写。代码如下：

package main

import (

    "fmt"

    "sort"

)

func main() {

    a := NewTopK(2)

    a.add("fdd")

    a.add("moon")

    a.add("moonfdd")

    a.add("moonfdd")

    ret := a.topk()

    for i := 0; i < len(ret); i++ {

        fmt.Println(ret[i])

    }

}

type TopK struct {

    //堆

    heap     []*Node

    heapSize int

    //字，次数

    wordNodeMap map[string]*Node

    //反向表

    nodeIndexMap map[*Node]int

}

func NewTopK(k int) *TopK {

    ret := &TopK{}

    ret.heap = make([]*Node, k)

    ret.wordNodeMap = make(map[string]*Node)

    ret.nodeIndexMap = make(map[*Node]int)

    return ret

}

func (this *TopK) add(word string) {

    if len(this.heap) == 0 {

        return

    }

    var curNode *Node

    preIndex := -1

    curNode = this.wordNodeMap[word]

    //词频表 反向表

    if curNode == nil {

        curNode = &Node{word, 1}

        this.wordNodeMap[word] = curNode

        this.nodeIndexMap[curNode] = -1

    } else {

        curNode.Times++

        preIndex = this.nodeIndexMap[curNode]

    }

    //小根堆

    if preIndex == -1 {

        if this.heapSize == len(this.heap) {

            if this.compare(curNode, this.heap[0]) {

                //不用管了

                return

            }

            curNode, this.heap[0] = this.heap[0], curNode

            this.nodeIndexMap[curNode] = -1

            this.nodeIndexMap[this.heap[0]] = 0

            this.HeapDown(0)

        } else {

            this.Push(curNode)

        }

    } else {

        this.HeapDown(preIndex)

    }

}

func (this *TopK) topk() []string {

    heapCopy := make([]*Node, this.heapSize)

    copy(heapCopy, this.heap)

    sort.Slice(heapCopy, func(i, j int) bool {

        return !this.compare(heapCopy[i], heapCopy[j])

    })

    ans := make([]string, this.heapSize)

    for i := 0; i < this.heapSize; i++ {

        ans[i] = heapCopy[i].Str

    }

    return ans

}

type Node struct {

    Str   string

    Times int

}

//索引上移，小根堆

func (this *TopK) HeapUp(index int) {

    for (index-1)/2 != index && !this.compare(this.heap[(index-1)/2], this.heap[index]) { //父节点小于当前节点，当前节点必须上移

        this.heap[index], this.heap[(index-1)/2] = this.heap[(index-1)/2], this.heap[index]

        //加强堆

        this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[(index-1)/2]] = (index-1)/2, index

        index = (index - 1) / 2

    }

}

//索引下沉，小根堆

func (this *TopK) HeapDown(index int) {

    left := 2*index + 1

    for left <= this.heapSize-1 { //左孩子存在

        //获取小孩子

        largest := left

        if left+1 <= this.heapSize-1 && this.compare(this.heap[left+1], this.heap[left]) {

            largest++

        }

        //比较

        if !this.compare(this.heap[index], this.heap[largest]) { //当前大于最小孩子，必须下沉

            this.heap[index], this.heap[largest] = this.heap[largest], this.heap[index]

            //加强堆

            this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[largest]] = largest, index

        } else {

            break

        }

        //下一次遍历

        index = largest

        left = 2*index + 1

    }

}

func (this *TopK) Push(node *Node) {

    this.heap[this.heapSize] = node

    //加强堆

    this.nodeIndexMap[node] = this.heapSize

    //索引上移

    this.HeapUp(this.heapSize)

    this.heapSize++

}

func (this *TopK) Pop() *Node {

    ans := this.heap[0]

    this.heap[0], this.heap[this.heapSize-1] = this.heap[this.heapSize-1], this.heap[0]

    //加强堆

    this.nodeIndexMap[this.heap[0]] = 0

    this.nodeIndexMap[this.heap[this.heapSize-1]] = -1

    this.heapSize--

    //索引下沉

    this.HeapDown(0)

    return ans

}

func (this *TopK) compare(node1 *Node, node2 *Node) bool {

    if node1.Times == node2.Times {

        return node1.Str > node2.Str

    }

    return node1.Times < node2.Times

}

执行结果如下：

福大大答案2021-05-29：

方法一：
redis的sorted set。hash+跳表实现计数和查找。无代码。
方法二：
节点结构体：有字符串和词频。
词频表：key是字符串，value是节点。
堆：节点数组。
反向表：key是节点，value是在堆中的索引。
有代码，但不完整，因为时间紧。

代码用golang编写。代码如下：

package main

import "fmt"

func main() {

    a := NewTopK(2)

    a.add("lint")

    a.add("code")

    a.add("code")

    fmt.Println(a.topk())

}

type TopK struct {

    //堆

    heap     []*Node

    heapSize int

    //字，次数

    wordNodeMap map[string]*Node

    //反向表

    nodeIndexMap map[*Node]int

}

func NewTopK(k int) *TopK {

    ret := &TopK{}

    ret.heap = make([]*Node, k)

    return ret

}

func (this *TopK) add(word string) {

    if len(this.heap) == 0 {

        return

    }

    var curNode *Node

    preIndex := -1

    curNode = this.wordNodeMap[word]

    if curNode == nil {

        curNode = &Node{word, 1}

        this.wordNodeMap[word] = curNode

        this.nodeIndexMap[curNode] = -1

    } else {

        //tree set

        curNode.Times++

        preIndex = this.nodeIndexMap[curNode]

    }

    if preIndex == -1 {

        if this.heapSize == len(this.heap) {

            //treeset

        } else {

            //tree add

            this.nodeIndexMap[curNode] = this.heapSize

            this.heap[this.heapSize] = curNode

            this.HeapUp(preIndex)

        }

    } else {

        //tree add

        this.HeapDown(preIndex)

    }

}

func (this *TopK) topk() []string {

    ans := make([]string, len(this.heap))

    return ans

}

type Node struct {

    Str   string

    Times int

}

//索引上移，大根堆

func (this *TopK) HeapUp(index int) {

    for this.heap[(index-1)/2].Times < this.heap[index].Times { //父节点小于当前节点，当前节点必须上移

        this.heap[index], this.heap[(index-1)/2] = this.heap[(index-1)/2], this.heap[index]

        //加强堆

        this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[(index-1)/2]] = (index-1)/2, index

        index = (index - 1) / 2

    }

}

//索引下沉，大根堆

func (this *TopK) HeapDown(index int) {

    left := 2*index + 1

    for left <= this.heapSize-1 { //左孩子存在

        //获取大孩子

        largest := left

        if left+1 <= this.heapSize-1 && this.heap[left+1].Times > this.heap[left].Times {

            largest++

        }

        //比较

        if this.heap[index].Times < this.heap[largest].Times { //当前小于最大孩子，必须下沉

            this.heap[index], this.heap[largest] = this.heap[largest], this.heap[index]

            //加强堆

            this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[largest]] = largest, index

        } else {

            break

        }

        //下一次遍历

        index = largest

        left = 2*index + 1

    }

}

func (this *TopK) Push(node *Node) {

    this.heap[this.heapSize] = node

    //加强堆

    this.nodeIndexMap[node] = this.heapSize

    this.heapSize++

    //索引上移

    this.HeapUp(this.heapSize)

}

func (this *TopK) Pop() *Node {

    ans := this.heap[0]

    this.heap[0], this.heap[this.heapSize-1] = this.heap[this.heapSize-1], this.heap[0]

    //加强堆

    this.nodeIndexMap[this.heap[0]] = 0

    this.nodeIndexMap[this.heap[this.heapSize-1]] = -1

    this.heapSize--

    //索引下沉

    this.HeapDown(0)

    return ans

}

执行结果如下：

左神java代码

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top的更多相关文章

《程序员代码面试指南》第八章数组和矩阵问题在数组中找到出现次数大于N/K 的数
题目在数组中找到出现次数大于N/K 的数 java代码 package com.lizhouwei.chapter8; import java.util.ArrayList; import java ...
2021.05.29【NOIP提高B组】模拟总结
T1 题意:给你一个图,可以不花代价经过 \(K\) 条边,问从起点到终点的最短路考试的想法:设 \(dis_{i,j}\) 表示从起点免费了 \(j\) 条边到 \(i\) 的最短路然后直接跑 ...
[算法]在数组中找到出现次数大于N/K的数
题目: 1.给定一个整型数组,打印其中出现次数大于一半的数.如果没有出现这样的数,打印提示信息. 如:1,2,1输出1. 1,2,3输出no such number. 2.给定一个整型数组,再给 ...
2021.10.29 数位dp
2021.10.29 数位dp 1.数字计数我们先设数字为ABCD 看A000,如果我们要求出它所有数位之和,我们会怎么求? 鉴于我们其实已经求出了0到9,0到99,0到999...上所有数字个数( ...
2021.05.03 T3 数字
2021.05.03 T3 数字问题描述一个数字被称为好数字当他满足下列条件: 1. 它有**2*n**个数位,n是正整数(允许有前导0) 2. 构成它的每个数字都在给定的数字集合S中. 3. 它 ...
2021.05.14 tarjan
2021.05.14 tarjan 标准版tarjan 这里使用数组来模拟栈 void tarjan(int x){ ++ind; dfn[x]=low[x]=ind; stacki[++top]=x ...
项目Beta冲刺（团队）——05.29(7/7)
项目Beta冲刺(团队)--05.29(7/7) 格式描述课程名称:软件工程1916|W(福州大学) 作业要求:项目Beta冲刺(团队) 团队名称:为了交项目干杯作业目标:记录Beta敏捷冲刺第7 ...
2021.6.29考试总结[NOIP模拟10]
T1 入阵曲二位前缀和暴力n4可以拿60. 观察到维护前缀和时模k意义下余数一样的前缀和相减后一定被k整除,前缀和维护模数,n2枚举行数,n枚举列, 开一个桶记录模数出现个数,每枚举到该模数就加上它 ...
2021.10.29 P1649 [USACO07OCT]Obstacle Course S（BFS）
2021.10.29 P1649 [USACO07OCT]Obstacle Course S(BFS) 题意: 给一张n*n的图,起点为A,终点为 B,求从A到B转弯次数最少为多少. 分析: 是否存在 ...
2021.05.09【NOIP提高组】模拟赛总结
2021.05.09[NOIP提高组]模拟赛总结 T1 T2

随机推荐

提供离线chrome谷歌浏览器插件crx的网站有
crx4:http://www.crx4.com/ 极简插件:https://chrome.zzzmh.cn/index 扩展迷:https://www.extfans.com/ 浏览器插件下载中心: ...
JMeter压测脚本实例：单接口
新建测试计划添加线程组添加HTTP请求配置该请求相关参数 1.请求头部信息 ①HTTP请求同级线程组下添加HTTP信息头部管理器 ②填充该请求所需的头部信息 2.请求体选中之前增加的HTTP请 ...
Android笔记--动态申请权限
动态申请权限在动态申请权限这里,一共分为两种不同的模式,分别是Lazy模式(懒汉式)和Hungry模式(饿汉式),这两种模式区分的话,可以通俗地解释一下就是,对于懒汉来说,只有在我们点击某个按钮需要 ...
Android笔记--Activity--启停活动页面
Activity启动从当前页面跳转到新的页面:startActivity(new Intent(原页面.this,目标页面.class)) 而若是从当前页面返回到上一个页面,相当于关闭当前页面,使用 ...
Tesseract5+OpenCV4（VS2017+win10）实现OCR识别
一.环境配置较之前采用cppan进行编译的方式,vcpkg的方式已经发生了许多变化,带来的最大不同就是便捷. 对于在NuGet中能够找到的Vcpkg的export,真的实现了开箱即用这样的话对于普 ...
把 ChatGPT 加入 Flutter 开发，会有怎样的体验？
前言 ChatGPT 最近一直都处于技术圈的讨论焦点.它除了可作为普通用户的日常 AI 助手,还可以帮助开发者加速开发进度.声网社区的一位开发者"小猿"就基于 ChatGPT 做了 ...
小霸王、红白机、FC游戏、街机游戏在线玩的网站
前段时间小笨就想做一个红白机在线玩的网站,作为90后,也玩过不少小霸王fc游戏,于是花了两个星期时间做了出来.前端界面略丑,因为小笨不是专做前端的,就将就一下吧,哈哈!网站暂时添加了数款怀旧游戏,包括 ...
MyBatisPlus 自动填充演示
一.数据库表中新增"添加时间"和"修改时间"字段:
H5-生成二维码
<div class="poster-qr"> <div class="qrWrapper"> <!-- 放置二维码的容器 --& ...
【ACM算法竞赛日常训练】DAY5题解与分析【储物点的距离】【糖糖别胡说，我真的不是签到题目】| 前缀和 | 思维
DAY5共2题: 储物点的距离(前缀和) 糖糖别胡说,我真的不是签到题目(multiset,思维) 作者:Eriktse 简介:19岁,211计算机在读,现役ACM银牌选手力争以通俗易懂的方式讲解算法 ...

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top的更多相关文章

随机推荐

热门专题