通过 python 生成词云的代码

记录一下相关代码
#coding:utf-8

# 导入需要模块
import jieba
from PIL import Image 
import numpy as np 
import matplotlib.pyplot as plt 
from wordcloud import WordCloud, STOPWORDS 
import io

'''
text = open(r'D:\test.txt','r').read()

wc = WordCloud(
        background_color="white", max_font_size=300,
        width=800, height=600, margin=2 )

wc.generate(text)

plt.imshow(wc)
plt.axis("off")
plt.show()

wc.to_file('test.png')
'''


#读取txt文件
text = io.open(r'D:\dong.txt','r',encoding='utf-8').read()
print(u'加载txt文件成功')

#进行分词
wordlist_after_jieba = jieba.cut(text, cut_all=False)
wl_space_split = " ".join(wordlist_after_jieba)
print(u'分词成功')

# 读取照片通过numpy.array函数将照片等结构数据转化为np-array
mask=np.array(Image.open( r'D:mask.png'))   
print(u'加载图片成功！')


#选择屏蔽词，不显示在词云里面
stopwords = set(STOPWORDS)
stopwords.add("此用户没有填写评价")
# stopwords.add("收到”)可以加多个屏蔽词


#创建词云对象并设置参数
# 需要注意的是使用mask参数后不能使用width, height参数，因为图片大小已经确定
wc = WordCloud( 
    #选择背景色
    #设置字体，不指定就会出现乱码
    font_path=r'C:\Windows\Fonts\SIMYOU.ttf',
    background_color="black", max_words=300, mask=mask, stopwords=stopwords, max_font_size=200, min_font_size = 10,
    # 设置有多少种随机生成状态，即有多少种配色方案
    random_state=30
    )


#生成词云
wc.generate(text)

#存储并指定文件名称
wc.to_file( u"三体3死神永生词云.png")


#通过matplotlib绘图库显示词云
plt.imshow(wc, interpolation='bilinear') 
plt.axis("off")

#不显示坐标轴
plt.figure() 
plt.imshow(mask, cmap=plt.cm.gray, interpolation='bilinear') 
plt.axis("off") 
plt.show() 
print(u'生成词云成功!')