import pandas as pd
from math import log
def calcInfoEnt():
datas = pd.read_csv('dataSet.csv',header=None)
labels = datas.iloc[:,-1].value_counts() #计算每一类样本的个数
dataNum = len(datas) #整个数据样本数量
infoEnt = 0.0
for k in range(len(labels)):
CkD = labels[k]/dataNum
infoEnt += CkD * log(CkD,2)
print(-infoEnt) #计算公式前面有一个负号所以是-infoEnt
return -infoEnt
infoEnt = calcInfoEnt()


京公网安备 11010502036488号