http只获取response header信息
requests检查繁体中文编码
#local file version
encodetype=['big5','big5hkscs','cp950','gb2312','gbk','gb18030','hz','utf_16','utf_16_be','utf_16_le','utf_7','utf_8']
rawdata = open('traditional_simplified.csv','rb').read()
for encodeid in encodetype:
try:
rawdatafile=rawdata.decode(encodeid)
except UnicodeDecodeError:
print('not this encoding')
else:
print(encodeid)
break
#url version
import urllib
encodetype=['big5','big5hkscs','cp950','gb2312','gbk','gb18030','hz','utf_16','utf_16_be','utf_16_le','utf_7','utf_8']
rawdata=urllib.request.urlopen('https://tw.yahoo.com').read()
for encodeid in encodetype:
try:
rawdatafile=rawdata.decode(encodeid)
except UnicodeDecodeError:
print('not this encoding')
else:
print(encodeid)
break