1 import requests 2 from bs4 import BeautifulSoup 3 import bs4 4 5 def getHTMLText(url): 6 try: 7 r = requests.get(url, timeout = 30) 8 r.raise_for_status() 9 r.encoding = r.apparent_encoding 10 return r.text 11 12 13 except: 14 return "" 15 16 return "" 17 18 def fillUnivList(ulist,html): 19 soup = BeautifulSoup(html, "html.parser") 20 for tr in soup.find('tbody').children: 21 if isinstance(tr, bs4.element.Tag): 22 tds = tr('td') 23 ulist.append([tds[0].string, tds[1].string, tds[2].string]) 24 25 26 27 28 29 def printUnivList(ulist, num): 30 print("{:^10}\t{:^6}\t{:^10}".format("排名","学校名称","总分")) 31 for i in range(num): 32 u=ulist[i] 33 print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2])) 34 35 def main(): 36 uinfo = [] 37 url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2016.html' 38 html = getHTMLText(url) 39 fillUnivList(uinfo,html) 40 printUnivList(uinfo, 20) 41 main() 42
结合 这个课第一次敲得爬虫,,,,没有Pyhton语法,主要是老师说啥我咋敲,没有技术含量.就是不知道咋敲,一直报错。。