一种简单的泛型符号表的API:

一种有序的泛型符号表的API


1无序链表中的顺序查找

1.1原理

get()的实现即为遍历链表,用equals()方法比较需被查找的键和每个结点中的键。如果匹配成功就返回相应的值,否则返回null。
put()的实现也是遍历链表,用equals()方法比较需被查找的键和每个结点中的键。如果匹配成功就用更新和该键相关联的值,否则我们就用给定的键值对创建一个新的结点并将其插入到链表的开头。

1.2代码实现

package searching;

import edu.princeton.cs.algs4.Queue;
import java.util.Scanner;

public class SequentialSearchST<Key, Value> {
    private int n;           // number of key-value pairs
    private Node first;      // the linked list of key-value pairs

    private class Node {
        private Key key;
        private Value val;
        private Node next;

        public Node(Key key, Value val, Node next)  {
            this.key  = key;
            this.val  = val;
            this.next = next;
        }
    }
    public SequentialSearchST() {
    }
    public int size() {
        return n;
    }
    public boolean isEmpty() {
        return size() == 0;
    }
    public boolean contains(Key key) {
        if (key == null) throw new IllegalArgumentException("argument to contains() is null");
        return get(key) != null;
    }
    public Value get(Key key) {
        if (key == null) throw new IllegalArgumentException("argument to get() is null"); 
        for (Node x = first; x != null; x = x.next) {
            if (key.equals(x.key))
                return x.val;
        }
        return null;
    }
    public void put(Key key, Value val) {
        if (key == null) throw new IllegalArgumentException("first argument to put() is null"); 
        if (val == null) {
            delete(key);
            return;
        }

        for (Node x = first; x != null; x = x.next) {
            if (key.equals(x.key)) {
                x.val = val;
                return;
            }
        }
        first = new Node(key, val, first);
        n++;
    }
    public void delete(Key key) {
        if (key == null) throw new IllegalArgumentException("argument to delete() is null"); 
        first = delete(first, key);
    }
    // delete key in linked list beginning at Node x
    // warning: function call stack too large if table is large
    private Node delete(Node x, Key key) {
        if (x == null) return null;
        if (key.equals(x.key)) {
            n--;
            return x.next;
        }
        x.next = delete(x.next, key);
        return x;
    }
    public Iterable<Key> keys()  {
        Queue<Key> queue = new Queue<>();
        for (Node x = first; x != null; x = x.next)
            queue.enqueue(x.key);
        return queue;
    }
    public static void main(String[] args) {
        SequentialSearchST<String, Integer> st = new SequentialSearchST<>();
        // 测试数据:S E A R C H E X A M P L E
        Scanner sc=new Scanner(System.in);
        String ss=sc.nextLine();
        String[] keys=ss.split("\\s+");
        for (int i = 0; i<keys.length; i++) {
            st.put(keys[i], i);
        }
        for (String s : st.keys())
            System.out.println(s + " " + st.get(s));
    }
}

2有序数组中的二分查找

2.1原理

使用一对平行的数组,一个存储键一个存储值。核心是rank()放法,返回表中给定键的键的数量。在查找时,先将被查找的键和子数组中的中间键进行比较。如果被查找的键小于中间键,我们就在左子数组中继续查找,如果大于我们就在右子数组中继续查找,否则中间键就是要找的键。
rank()满足:
1.如果表中存在该键,rank()应该返回该键的位置,也就是表中小于它的键的数量;2.如果表中不存在该键,rank()还是应该返回表中小于它的键的数量。

2.2代码实现

package searching;

import edu.princeton.cs.algs4.Queue;
import java.util.Scanner;

public class BinarySearchST<Key extends Comparable<Key>,Value> {
    private Key[] keys;
    private Value[] vals;
    private int N;
    public BinarySearchST(int capacity){
        keys = (Key[]) new Comparable[capacity];
        vals = (Value[]) new Object[capacity];
    }
    public void put(Key key,Value val){//查找键,找到则更新值,否则创建新的元素
        int i=rank(key);//i的值可能为0,N,1~N-1;
        if(i<N && keys[i].compareTo(key)==0){
            vals[i]=val;
            return;
        }
        for(int j=N;j>i;j--){
            keys[j]=keys[j-1];
            vals[j]=vals[j-1];
        }
        keys[i]=key;
        vals[i]=val;
        N++;
    }
    public Value get(Key key){
        if(isEmpty()) return null;
        int i=rank(key);
        if(i<N && keys[i].compareTo(key)==0){
            return vals[i];
        }
        else return null;
    }
    public void delete(Key key){
        if(isEmpty()) return;
        int i=rank(key);//i的值可能为0,N,1~N-1;
        if(i==N || keys[i].compareTo(key)!=0){//key不在表中
            return;
        }
        for(int j=i;j<N-1;j++){
            keys[j]=keys[j+1];
            vals[j]=vals[j+1];
        }
        N--;
        keys[N]=null;//避免对象游离
        vals[N]=null;
    }
    boolean contains(Key key){
        return get(key) != null;
    }
    public boolean isEmpty() {
        return size() == 0;
    }
    public int size(){
        return N;
    }
    public Key min(){
        return keys[0];
    }
    public Key max(){
        return keys[N-1];
    }
    public Key floor(Key key){//小于等于key的最大键
        int i=rank(key);//i的值可能为0,N,1~N-1;
        if (i < N && key.compareTo(keys[i]) == 0){//key在表中
            return keys[i];
        }
        //key不在表中
        if(i==0) return null;
        else return keys[i-1];
    }
    public Key ceiling(Key key){//大于等于key的最小键
        int i=rank(key);
        return keys[i];
    }
    public int rank(Key key){//排名:表中小于给定键的键的数量
        int lo=0,hi=N-1;
        while(lo<=hi){
            int mid=lo+(hi-lo)/2;
            int cmp=key.compareTo(keys[mid]);
            if(cmp<0) hi=mid-1;
            else if(cmp>0) lo=mid+1;
            else return mid;//命中查找时
        }
        return lo;//未命中时,lo>hi时循环退出,此时lo即为排名
    }
    //rank()的递归实现
    /*public int rank(Key key,int lo,int hi){ if(lo>hi) return lo; int mid=lo+(hi-lo)/2; int cmp=key.compareTo(keys[mid]); if(cmp<0){ return rank(key,lo,mid-1); }else if (cmp>0){ return rank(key,mid+1,hi); }else { return mid; } }*/
    public Key select(int k){//返回排名为k的键
        return keys[k];
    }
    public void deleteMin() {
        delete(min());
    }
    public void deleteMax() {
        delete(max());
    }
    public int size(Key lo, Key hi) {
        if (lo.compareTo(hi) > 0) return 0;
        if (contains(hi)) return rank(hi) - rank(lo) + 1;
        else              return rank(hi) - rank(lo);
    }
    public Iterable<Key> keys(Key lo, Key hi) {
        Queue<Key> queue = new Queue<>();
        if (lo.compareTo(hi) > 0) return queue;
        for (int i = rank(lo); i < rank(hi); i++)
            queue.enqueue(keys[i]);
        if (contains(hi)) queue.enqueue(keys[rank(hi)]);
        return queue;
    }
    public Iterable<Key> keys() {//表中所有键的集合
        return keys(min(), max());
    }
    public static void main(String[] args) {
        BinarySearchST<String, Integer> st = new BinarySearchST<>(100);
        // 测试数据:S E A R C H E X A M P L E
        Scanner sc=new Scanner(System.in);
        String ss=sc.nextLine();
        String[] keys=ss.split("\\s+");
        for (int i = 0; i<keys.length; i++) {
            st.put(keys[i], i);
        }
        for (String s : st.keys())
           System.out.println(s + " " + st.get(s));
    }
}

3二叉查找树

3.1原理

查找

插入

floor(向下取整)小于等于key的最大键


select查找排名为k的键

rank返回给定键的排名

删除最小键


删除指定键

在删除结点x后用它的后继结点填补它的位置。

keys范围查找

参考中序遍历的方法,先(递归地)查找根结点的左子树中在该范围内的结点,再查找根结点,然后(递归地)查找根结点的右子树中在该范围内的结点。

3.2代码实现

package searching;

import edu.princeton.cs.algs4.Queue;
import java.util.Scanner;

public class BST<Key extends Comparable<Key>, Value> {
    private Node root;//二叉查找树的根结点

    private class Node{
        private Key key;//键
        private Value val;//值
        private Node left,right;//指向子树的链接
        private int N;//以该结点为根的子树中的结点数

        public Node(Key key,Value value,int N){
            this.key=key;
            this.val=value;
            this.N=N;
        }
    }
    public boolean isEmpty() {
        return size() == 0;
    }

    public int size(){
        return size(root);
    }
    private int size(Node x){
        if(x==null) return 0;
        else return x.N;
    }

    public Value get(Key key){
        return get(root,key);
    }
    private Value get(Node x,Key key){
        //在以x为根结点的子树中查找并返回key所对应的值
        //如果找不到则返回null
        if(x==null) return null;
        int cmp=key.compareTo(x.key);
        if(cmp<0) return get(x.left,key);
        else if(cmp>0) return get(x.right,key);
        else return x.val;
    }

    public void put(Key key,Value val){
        //查找key,找到则更新它的值,否则为它创造一个新的结点
        root=put(root,key,val);
    }
    private Node put(Node x,Key key,Value val){
        //如果key存在于以x为根结点的子树中则更新它的值
        //否则将以key和val为键值对的新结点插入到该子树中
        if(x==null) return new Node(key,val,1);
        int cmp=key.compareTo(x.key);
        if(cmp<0) x.left=put(x.left,key,val);
        else if(cmp>0) x.right=put(x.right,key,val);
        else x.val=val;
        x.N=size(x.left)+size(x.right)+1;
        return x;
    }

    public Key min(){
        return min(root).key;
    }
    private Node min(Node x){
        if(x.left==null) return x;
        return min(x.left);
    }

    public Key max() {
        return max(root).key;
    }
    private Node max(Node x) {
        if (x.right == null) return x;
        else                 return max(x.right);
    }

    public Key floor(Key key){
        Node x=floor(root,key);
        if(x==null) return null;
        else return x.key;
    }
    private Node floor(Node x,Key key){
        if(x==null) return null;
        int cmp=key.compareTo(x.key);
        if(cmp==0) return x;
        if(cmp<0) return floor(x.left,key);
        Node t=floor(x.right,key);
        if(t!=null) return t;
        else return x;
    }

    public Key ceiling(Key key){
        Node x=ceiling(root,key);
        if(x==null) return null;
        else return x.key;
    }
    private Node ceiling(Node x,Key key){
        if(x==null) return null;
        int cmp=key.compareTo(x.key);
        if(cmp==0) return x;
        if(cmp>0) return ceiling(x.right,key);
        Node t=ceiling(x.left,key);
        if(t!=null) return t;
        else return x;
    }

    public Key select(int k){
        return select(root, k).key;
    }
    private Node select(Node x,int k){
        //返回排名为k的结点
        if(x==null) return null;
        int t=size(x.left);
        if(t>k) return select(x.left,k);
        else if(t<k) return select(x.right,k-t-1);
        else return x;
    }

    public int rank(Key key){
        return rank(key,root);
    }
    private int rank(Key key,Node x){
        //返回以x为根结点的子树中小于键key的键的数量
        if(x==null) return 0;
        int cmp=key.compareTo(x.key);
        if(cmp<0) return rank(key,x.left);
        else if(cmp>0) return size(x.left)+1+rank(key,x.right);
        else return size(x.left);
    }

    public void deleteMin(){
        root=deleteMin(root);
    }
    private Node deleteMin(Node x){
        if(x.left==null) return x.right;
        x.left=deleteMin(x.left);
        x.N=size(x.left)+size(x.right)+1;
        return x;
    }

    public void deleteMax() {
        root = deleteMax(root);
    }
    private Node deleteMax(Node x) {
        if (x.right == null) return x.left;
        x.right = deleteMax(x.right);
        x.N= size(x.left) + size(x.right) + 1;
        return x;
    }

    public void delete(Key key){
        root=delete(root,key);
    }
    private Node delete(Node x,Key key){
        if(x==null) return null;
        int cmp=key.compareTo(x.key);
        if(cmp<0) x.left=delete(x.left,key);
        else if(cmp>0) x.right=delete(x.right,key);
        else{
            if(x.right==null) return x.left;
            if(x.left==null) return x.right;
            Node t=x;
            x=min(t.right);
            x.right=deleteMin(t.right);
            x.left=t.left;
        }
        x.N=size(x.left)+size(x.right)+1;
        return x;
    }

    public Iterable<Key> keys(){
        if (isEmpty()) return new Queue<>();
        return keys(min(),max());
    }
    public Iterable<Key> keys(Key lo,Key hi){
        Queue<Key> queue=new Queue<>();
        keys(root,queue,lo,hi);
        return queue;
    }
    private void keys(Node x, Queue<Key> queue, Key lo, Key hi){
        if(x==null) return;
        int cmplo=lo.compareTo(x.key);
        int cmphi=hi.compareTo(x.key);
        if(cmplo<0) keys(x.left,queue,lo,hi);//查找左子树
        if(cmplo<=0 && cmphi>=0) queue.enqueue(x.key);
        if (cmplo>0) keys(x.right,queue,lo,hi);//查找右子树
    }

    public boolean contains(Key key) {
        return get(key) != null;
    }
    public int size(Key lo, Key hi) {
        if (lo.compareTo(hi) > 0) return 0;
        if (contains(hi)) return rank(hi) - rank(lo) + 1;
        else              return rank(hi) - rank(lo);
    }

    public static void main(String[] args) {
        BinarySearchST<String, Integer> st = new BinarySearchST<>(100);
        // 测试数据:S E A R C H E X A M P L E
        Scanner sc=new Scanner(System.in);
        String ss=sc.nextLine();
        String[] keys=ss.split("\\s+");
        for (int i = 0; i<keys.length; i++) {
            st.put(keys[i], i);
        }
        for (String s : st.keys()) {
            System.out.println(s + " " + st.get(s));
        }
    }
}

4平衡查找树(红黑树)

5散列表

如果所有的键都是小整数,可以用一个数组来实现无序的符号表,将键作为数组的索引而数组中键i处所存储的就是它对应的值,这样可以快速访问任意键的值。使用散列表分为两步:
第一步是用散列函数将被查找的键转化为一数组的一个索引。理想情况下不同的键都会转化为不同的索引值。这只是理想情况,所以需要面对两个或多个键会散列到形同的索引值的情况。
第二步处理碰撞冲突:拉链法和线性探测法。

散列函数

正整数:除留余数法。
选择大小为素数M的数组,对于任意正整数k,计算k除以M的余数(k%M)。
浮点数:将键表示为二进制数再使用除留余数法。
字符串:

int hash = 0;
for (int i = 0; i < s.length(); i++)
hash = (R * hash + s.charAt(i)) % M;

如果R比任何字符的值都大,这种计算相当于将字符串单作一个N位的R进制数,将它除以M并取余。

将hashCode的返回值转化为一个数组索引

5.1基于拉链法的散列表

原理

代码实现

package searching;

import edu.princeton.cs.algs4.Queue;
import java.util.Scanner;

public class SeparateChainingHashST<Key,Value> {
    private int N;//键值对总数
    private int M;//散列表的大小
    private SequentialSearchST<Key,Value>[] st;//存放链表对象的数组
    public SeparateChainingHashST(){
        this(997);
    }
    public SeparateChainingHashST(int M){
        //创建M条链表
        this.M=M;
        st=(SequentialSearchST<Key, Value>[]) new SequentialSearchST[M];
        for(int i=0;i<M;i++){
            st[i]=new SequentialSearchST<>();
        }
    }
    public int size(){
        return N;
    }
    public boolean isEmpty() {
        return size() == 0;
    }
    public boolean contains(Key key) {
        return get(key) != null;
    }
    private int hash(Key key){
        return (key.hashCode() & 0x7fffffff)%M;
    }
    public Value get(Key key){
        return st[hash(key)].get(key);
    }
    public void put(Key key,Value val){
        int i = hash(key);
        if (!st[i].contains(key)) N++;
        st[i].put(key, val);
    }
    public void delete(Key key) {
        int i = hash(key);
        if (st[i].contains(key)) N--;
        st[i].delete(key);
    }
    public Iterable<Key> keys(){
        Queue<Key> queue = new Queue<>();
        for(int i=0;i<M;i++){
            for (Key key:st[i].keys()) {
                queue.enqueue(key);
            }
        }
        return queue;
    }
    public static void main(String[] args) {
        SeparateChainingHashST<String, Integer> st = new SeparateChainingHashST<>();
        // 测试数据:S E A R C H E X A M P L E
        Scanner sc=new Scanner(System.in);
        String ss=sc.nextLine();
        String[] keys=ss.split("\\s+");
        for (int i = 0; i<keys.length; i++) {
            st.put(keys[i], i);
        }
        for (String s : st.keys())
            System.out.println(s + " " + st.get(s));
    }
}

5.2基于线性探测法的散列表

原理


删除

直接将键所在位置设为null不行,这会使得在此位置后的元素无法被找到,因此,需要将簇中被删除的右侧的所有键重新插入散列表。

代码实现

package searching;

import edu.princeton.cs.algs4.Queue;
import java.util.Scanner;

public class LinearProbingHashST<Key,Value>{
    private int N;//符号表中的键值对总数
    private int M;//线性探测表的大小,M>N
    private Key[] keys;
    private Value[] vals;
    public LinearProbingHashST(){
     this(4);
    }
    public LinearProbingHashST(int cap){
        M = cap;
        N = 0;
        keys=(Key[]) new Object[M];
        vals=(Value[]) new Object[M];
    }
    private int hash(Key key){
        return (key.hashCode() & 0x7fffffff)%M;
    }
    public void put(Key key,Value val){
        if(N>=M/2) resize(2*M);//将M加倍
        int i;
        for(i=hash(key);keys[i]!=null;i=(i+1)%M){
            if(keys[i].equals(key)){
                vals[i]=val;
                return;
            }
        }
        keys[i]=key;
        vals[i]=val;
        N++;
    }
    public Value get(Key key){
        for(int i=hash(key);keys[i]!=null;i=(i+1)%M){
            if(keys[i].equals(key)){
                return vals[i];
            }
        }
        return null;
    }
    private void resize(int cap){
        LinearProbingHashST<Key,Value> t;
        t=new LinearProbingHashST<>(cap);
        for(int i=0;i<M;i++){
            if(keys[i]!=null){
                t.put(keys[i],vals[i]);
            }
        }
        keys=t.keys;
        vals=t.vals;
        M=t.M;
    }
    public int size() {
        return N;
    }
    public boolean isEmpty() {
        return size() == 0;
    }
    public boolean contains(Key key) {
        return get(key) != null;
    }
    public void delete(Key key) {
        if (!contains(key)) return;

        // find position i of key
        int i = hash(key);
        while (!key.equals(keys[i])) {
            i = (i + 1) % M;
        }

        // delete key and associated value
        keys[i] = null;
        vals[i] = null;

        // rehash all keys in same cluster
        i = (i + 1) % M;
        while (keys[i] != null) {
            // delete keys[i] an vals[i] and reinsert
            Key   keyToRehash = keys[i];
            Value valToRehash = vals[i];
            keys[i] = null;
            vals[i] = null;
            N--;
            put(keyToRehash, valToRehash);
            i = (i + 1) % M;
        }
        N--;
        // halves size of array if it's 12.5% full or less
        if (N > 0 && N <= M/8) resize(M/2);
    }
    public Iterable<Key> keys() {
        Queue<Key> queue = new Queue<>();
        for (int i = 0; i <M; i++)
            if (keys[i] != null) queue.enqueue(keys[i]);
        return queue;
    }
    public static void main(String[] args) {
        LinearProbingHashST<String, Integer> st = new LinearProbingHashST<>();
        // 测试数据:S E A R C H E X A M P L E
        Scanner sc=new Scanner(System.in);
        String ss=sc.nextLine();
        String[] keys=ss.split("\\s+");
        for (int i = 0; i<keys.length; i++) {
            st.put(keys[i], i);
        }
        for (String s : st.keys())
            System.out.println(s + " " + st.get(s));
    }
}

5.3散列表优缺点

6各种符号表的实现对比