使用python分析,达里奥的热文《The Changing World Order》的第七章《US-China Relations and Wars》

#CalUSChinaRelationsV1.py
def getText():
    txt =open("USChinaRelations.txt", "r", encoding = "utf-8").read()
    for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~':
        txt = txt.replace(ch, " ")
    return txt
    
USChinaTxt = getText()

excloudes = {
   "the","and","to","of","that","is","in","are","it","be","will","they","for",\
             "have","which","other","them","their","more","with","because","these","as","what",\
             "most","by","on","we","power","would","has","how","or","not","than","there","its",\
             "can","so","from","The","at","being","much","each","both","this","about","if","do","those","one","when"}

# 形成列表
words = USChinaTxt.split()    

# 将列表中元素变成字典键值对 
counts = {
   }
for word in words:
    if len(word)==1:
        continue
    elif word == "United States" or word == "US":
        rword ="US"
    
    else:
        rword = word        
    counts[rword]= counts.get(rword,0) + 1

for word in excloudes:
    del counts[word]


# 将键值对变成一个列表里的元组
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(30):
    word, count = items[i]
    print("{0:<10}{1:>5}".format(word,count))
        
    

去掉各种没用的词,结果分析如下
Chinese 109
China 95
US 85
United 71
States 62
countries 60
war 57
Americans 48
currency 46
you 45
people 38
fight 37
debt 36
As 33
all 33
important 33
wars 33
In 31
who 31
American 31
big 30
leaders 30
That 30
best 30
For 30
now 30
should 29
world 29
like 28
money 28

由此可见,中国和美国是文章中最常提及的词,wars和money也是反复提及。