利用jieba库统计《全唐诗》中写诗最勤奋的10位诗人。

#CalQuantangshiV2.py
import jieba

def getText():
    txt = open("quantangshi.txt", "r", encoding = "utf-8").read()
    txt = txt.lower()
    for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~1234567890]】【':
        txt = txt.replace(ch, " ")
    return txt


excloudes = {
   "何处","一作","万里","今日","二首","春风","白云","不知","千里","不可","长安","不见","国学","原典","集部","全唐诗","故人","无人","不得","明月","齐己","人间","惆怅","使君","秋风","悠悠","相思","如何","青山","白日","何人","相逢","皎然","江南","乐章","少年","寂寞","平生","黄金","司空","山中","何事","贯休"}
words = jieba.lcut(getText())
counts = {
   }

for word in words:
    if len(word)==1:
        continue
    else:
        rword = word        
    counts[rword]= counts.get(rword,0) + 1


for word in excloudes:
    del counts[word]

items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
    word, count = items[i]
    print("{0:<10}{1:>5}".format(word,count))

结果显示
白居易 2696
杜甫 1195
李白 964
刘禹锡 765
陆龟蒙 598
孟郊 596
元稹 595
韩愈 590
李商隐 562
韦应物 556