题解 | #字符编码#

解题思路

这是一道使用Huffman编码思想的问题，主要思路如下：

问题分析：
- 给定一个字符串，需要进行二进制编码
- 相同的字符必须使用相同的编码
- 要求编码后的总长度最短
解决方案：
- 统计每个字符的出现次数
- 使用优先队列（小顶堆）合并最小的两个长度
- 类似Huffman树的构建过程
关键点：
- 先对字符串排序，便于统计连续相同字符
- 使用优先队列维护最小的两个长度
- 每次合并后将新长度加入队列

代码

cpp
java
python

#include <iostream>
#include <string>
#include <queue>
#include <algorithm>
using namespace std;

int getMinEncodingLength(string s) {
    // 先排序，便于统计
    sort(s.begin(), s.end());
    
    // 使用小顶堆存储每组相同字符的长度
    priority_queue<int, vector<int>, greater<int>> pq;
    
    // 统计连续相同字符的长度
    for (int i = 0; i < s.length();) {
        int j = i;
        while (j < s.length() && s[j] == s[i]) {
            j++;
        }
        pq.push(j - i);  // 将该字符出现的次数加入队列
        i = j;
    }
    
    // 类似Huffman树的构建过程
    int totalLength = 0;
    while (pq.size() > 1) {
        int a = pq.top(); pq.pop();
        int b = pq.top(); pq.pop();
        totalLength += a + b;  // 合并两个最小的长度
        pq.push(a + b);       // 将合并后的长度放回队列
    }
    
    return totalLength;
}

int main() {
    string s;
    while (cin >> s) {
        cout << getMinEncodingLength(s) << endl;
    }
    return 0;
}

import java.util.*;

public class Main {
    public static int getMinEncodingLength(String s) {
        // 转换为字符数组并排序
        char[] chars = s.toCharArray();
        Arrays.sort(chars);
        
        // 使用优先队列（小顶堆）存储每组相同字符的长度
        PriorityQueue<Integer> pq = new PriorityQueue<>();
        
        // 统计连续相同字符的长度
        for (int i = 0; i < chars.length;) {
            int j = i;
            while (j < chars.length && chars[j] == chars[i]) {
                j++;
            }
            pq.offer(j - i);  // 将该字符出现的次数加入队列
            i = j;
        }
        
        // 类似Huffman树的构建过程
        int totalLength = 0;
        while (pq.size() > 1) {
            int a = pq.poll();
            int b = pq.poll();
            totalLength += a + b;  // 合并两个最小的长度
            pq.offer(a + b);      // 将合并后的长度放回队列
        }
        
        return totalLength;
    }
    
    public static void main(String[] args) {
        Scanner sc = new Scanner(System.in);
        while (sc.hasNext()) {
            String s = sc.next();
            System.out.println(getMinEncodingLength(s));
        }
    }
}

from heapq import heappush, heappop

def get_min_encoding_length(s: str) -> int:
    # 先排序，便于统计
    s = sorted(s)
    
    # 使用小顶堆存储每组相同字符的长度
    heap = []
    
    # 统计连续相同字符的长度
    i = 0
    while i < len(s):
        j = i
        while j < len(s) and s[j] == s[i]:
            j += 1
        heappush(heap, j - i)  # 将该字符出现的次数加入堆
        i = j
    
    # 类似Huffman树的构建过程
    total_length = 0
    while len(heap) > 1:
        a = heappop(heap)
        b = heappop(heap)
        total_length += a + b  # 合并两个最小的长度
        heappush(heap, a + b)  # 将合并后的长度放回堆
    
    return total_length

# 处理输入
while True:
    try:
        s = input()
        print(get_min_encoding_length(s))
    except EOFError:
        break

算法及复杂度

算法：贪心（Huffman编码思想）
时间复杂度： $\mathcal{O}(n\log n)$ - 排序需要 $\mathcal{O}(n\log n)$ ，堆操作需要 $\mathcal{O}(k\log k)$ ， $k$ 为不同字符数
空间复杂度： $\mathcal{O}(k)$ - $k$ 为不同字符数，需要优先队列存储