题解 | #实时社交媒体热点追踪#

题目链接

实时社交媒体热点追踪

题目描述

模拟一个实时热点追踪系统的核心逻辑。系统维护一个带计数器的关键词队列。

关键词流入: 当一批新的关键词流入时，如果某个关键词已存在于队列中，其计数器加一；如果是新关键词，则将其加入队列尾部，计数器置为一。
输出与移除: 每处理完一批关键词后，系统会从队列头部（即最早进入的关键词）开始，输出最多 $K$ 个关键词及其当前的计数值。被输出的关键词将从队列中被彻底移除。

输入:

第一行为一个整数 $K$ ，代表仪表盘的显示容量。
从第二行开始，每行为一批用空格分隔的关键词字符串。

输出:

对应每一批输入，输出一行结果。
格式为 关键词1 计数值1 关键词2 计数值2 ...。
如果当前队列为空，则输出 null。

解题思路

这是一个典型的模拟题，核心在于设计一个能同时满足“先进先出”（FIFO）顺序和“快速查找更新”需求的数据结构。

数据结构选择：
- 为了快速查找一个关键词是否存在并更新其计数，我们需要一个哈希表结构，如 C++ 的 unordered_map、Java 的 HashMap 或 Python 的 dict。
- 为了维护关键词进入的先后顺序，我们需要一个队列结构，如 C++ 的 list/deque、Java 的 LinkedList 或 Python 的 deque。
- 语言特定优化：
  - 在 Java 中，LinkedHashMap 是最理想的选择，因为它本身就是一个哈希表，同时又通过内部链表维护了元素的插入顺序。
  - 在 Python 3.7+ 中，标准的 dict 类型也默认保留插入顺序，因此一个 dict 就可以同时满足两个需求。
  - 在 C++ 中，最高效的方式是组合使用 std::list 和 std::map：
    - std::list<pair<string, int>>: 用一个链表来存储关键词和计数的配对，保证“先进先出”的顺序。
    - std::map<string, list<pair<string, int>>::iterator>: 用一个哈希表来存储每个关键词对应的链表节点的迭代器。这样，当一个已存在的关键词再次出现时，可以通过哈希表在 $\mathcal{O}(\log N)$ 时间内直接定位到链表中的节点并更新其计数值，而无需遍历链表。
算法流程：
- 读取显示容量 $K$ 。
- 使用 while 循环持续读取输入的每一行。注意：即使某行为空，也代表一次处理周期，需要触发输出逻辑。
- 处理每批关键词：
  - 对当前行进行分割，得到所有关键词。
  - 遍历这些关键词：
    - 如果关键词已存在于哈希表中，将其计数值加一。
    - 如果关键词是新的，在哈希表中记录其计数值为 1，并将其添加到队列结构的尾部。
- 生成输出：
  - 检查处理后队列是否为空。若是，则输出 null。
  - 若非空，确定本次要输出的数量：count_to_output = min(K, current_queue_size)。
  - 如果 count_to_output 为 0（例如当 $K=0$ 时），也应输出 null。
  - 循环 count_to_output 次：
    - 从队列头部取出一个关键词。
    - 根据这个关键词从哈希表中获取它的计数值。
    - 将 关键词计数值 拼接到结果字符串。
    - 关键：将这个关键词从哈希表和队列中都彻底删除。
  - 输出拼接好的结果字符串。

代码

cpp
java
python

#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <list>
#include <map>
#include <algorithm>

using namespace std;

int main() {
    ios::sync_with_stdio(false);
    cin.tie(NULL);

    int k;
    cin >> k;
    string line;
    getline(cin, line); // 消耗换行符

    // 使用 list 维护插入顺序 (FIFO)，存储关键词和计数的 pair
    list<pair<string, int>> keyword_list;
    // 使用 map 快速查找关键词在 list 中的位置
    map<string, list<pair<string, int>>::iterator> keyword_map;

    while (getline(cin, line)) {
        stringstream ss(line);
        string keyword;

        while (ss >> keyword) {
            if (keyword_map.count(keyword)) {
                // 关键词已存在，通过迭代器直接更新 list 中的计数值
                keyword_map[keyword]->second++;
            } else {
                // 新关键词，加入 list 尾部，并在 map 中存储其迭代器
                keyword_list.push_back({keyword, 1});
                keyword_map[keyword] = prev(keyword_list.end());
            }
        }

        int output_count = min((size_t)k, keyword_list.size());

        if (output_count == 0) {
            cout << "null\n";
        } else {
            for (int i = 0; i < output_count; ++i) {
                const auto& front_item = keyword_list.front();
                cout << front_item.first << " " << front_item.second << (i == output_count - 1 ? "" : " ");
                
                // 从 map 和 list 中彻底移除该元素
                keyword_map.erase(front_item.first);
                keyword_list.pop_front();
            }
            cout << "\n";
        }
    }

    return 0;
}

import java.util.Scanner;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.ArrayList;
import java.util.List;

public class Main {
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        int k = Integer.parseInt(sc.nextLine());

        Map<String, Integer> hotTopics = new LinkedHashMap<>();

        while (sc.hasNextLine()) {
            String line = sc.nextLine();
            
            // 只有非空行才进行关键词处理
            if (line != null && !line.isEmpty()) {
                String[] keywords = line.split("\\s+");
                for (String keyword : keywords) {
                    hotTopics.put(keyword, hotTopics.getOrDefault(keyword, 0) + 1);
                }
            }
            
            int outputCount = Math.min(k, hotTopics.size());

            if (outputCount == 0) {
                System.out.println("null");
            } else {
                List<String> output = new ArrayList<>();
                List<String> toRemove = new ArrayList<>();
                
                for (Map.Entry<String, Integer> entry : hotTopics.entrySet()) {
                    if (output.size() >= outputCount) break;
                    output.add(entry.getKey() + " " + entry.getValue());
                    toRemove.add(entry.getKey());
                }

                for (String key : toRemove) {
                    hotTopics.remove(key);
                }

                System.out.println(String.join(" ", output));
            }
        }
    }
}

import sys

def main():
    k_line = sys.stdin.readline()
    if not k_line.strip():
        return
    k = int(k_line)
    
    # 在 Python 3.7+ 中，dict 默认保持插入顺序
    hot_topics = {}

    for line in sys.stdin:
        line = line.strip()
        if line: # 只有非空行才进行关键词处理
            keywords = line.split()
            for keyword in keywords:
                hot_topics[keyword] = hot_topics.get(keyword, 0) + 1
        
        output_count = min(k, len(hot_topics))
        
        if output_count == 0:
            print("null")
        else:
            output = []
            keys_to_remove = []
            
            # 获取将要被移除的键
            for key in hot_topics.keys():
                if len(keys_to_remove) >= output_count:
                    break
                keys_to_remove.append(key)

            # 生成输出并移除
            for key in keys_to_remove:
                value = hot_topics.pop(key)
                output.append(f"{key} {value}")

            print(" ".join(output))

if __name__ == "__main__":
    main()

算法及复杂度

算法：模拟、哈希表、队列
时间复杂度： $\mathcal{O}(W)$ ，其中 $W$ 是输入中所有关键词的总数。对于每个关键词，我们在哈希表中的插入、查找、更新和删除操作的平均时间复杂度都是 $\mathcal{O}(1)$ 。
空间复杂度： $\mathcal{O}(U)$ ，其中 $U$ 是在任意时刻队列中所包含的独立关键词的最大数量。