"""
爬取豆瓣电影TOP250 - 完整示例代码
""
import codecs
import xlwings as xw
import requests
from bs4 import BeautifulSoup
DOWNLOAD_URL = 'http://movie.douban.com/top250/'
movie_name_list = []  
director_name_list = []  
score_list = []  
def download_page(url):
    return requests.get(url, headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36'
    }).content
def parse_html(html):
    soup = BeautifulSoup(html)
    movie_list_soup = soup.find('ol', attrs={'class': 'grid_view'})
    
    for movie_li in movie_list_soup.find_all('li'):
        detail = movie_li.find('div', attrs={'class': 'hd'})
        director_detail = movie_li.find('div',attrs={'class':'bd'})
        score_detail = movie_li.find('div',attrs={'class':'star'})
        movie_name = detail.find('span', attrs={'class': 'title'}).getText()
        director_name = director_detail.find('p',attrs={'':''}).getText()
        score = score_detail.find('span',attrs={'class':'rating_num'}).getText()
        
        
        director_name_list.append(director_name) 
        movie_name_list.append(movie_name) 
        score_list.append(score)  
    next_page = soup.find('span', attrs={'class': 'next'}).find('a')
    
    if next_page:
        return movie_name_list, DOWNLOAD_URL + next_page['href']
    
    return movie_name_list, None
def showExcel():
    i = 0
    app = xw.App(visible=True, add_book=False)
    app.display_alerts = False
    
    filepath = r'D://Desktop/myexcel.xlsx'
    wb = app.books.open(filepath)
    sht = wb.sheets['sheet1']
    sht.range('A1').value = "电影名称"
    sht.range('B1').value = "详细信息"
    sht.range('C1').value = "豆瓣评分"
    while i < len(movie_name_list):
        sht.cells(i+2,1).value = movie_name_list[i]
        sht.cells(i+2,2).value = director_name_list[i]
        sht.cells(i+2,3).value = score_list[i]
        i = i + 1
def main():
    url = DOWNLOAD_URL
    while url:
        html = download_page(url)
        movies, url = parse_html(html)
        
if __name__ == '__main__':
    main()
    i = 0
    showExcel()