import pandas as pd
from math import log

def calcInfoEnt():
    datas = pd.read_csv('dataSet.csv',header=None)
    labels = datas.iloc[:,-1].value_counts() #计算每一类样本的个数
    dataNum = len(datas) #整个数据样本数量
    infoEnt = 0.0
    for k in range(len(labels)):
        CkD = labels[k]/dataNum
        infoEnt += CkD * log(CkD,2)
    print(-infoEnt) #计算公式前面有一个负号所以是-infoEnt
    return -infoEnt

infoEnt = calcInfoEnt()