目前公司正在探索关于柴油加油相关的项目,故需要让我爬取某些省份的加油站进行一个项目评估。本人通过百度,浏览csdn,发现目前没有一份十分完整满意的代码,故我结合网上和自己的修改,以下代码省时省力,复制粘贴即可用。

import json
from urllib import request
from urllib.parse import quote

import xlwt
import pandas as pd

amap_web_key = '' #去高德开发者平台申请,填你自己申请的key
#关键字搜索
poi_search_url = "http://restapi.amap.com/v3/place/text"
#poi_boundary_url = "https://ditu.amap.com/detail/get/detail"
#from transCoordinateSystem import gcj02_to_wgs84

cityname = '130000' #填你自己需要爬取的省代码
df = pd.read_excel("E:\cityNew\hebei.xlsx", usecols=[1],
                       names=None)
#这个excel表就是截取高德地图给的cityCode里关于该省下的全部
城市的代码。
df_li = df.values.tolist()
nanning_areas = []
for s_li in df_li:
    nanning_areas.append(str(s_li[0]))
#nanning_areas = ['370100','370200','370300','370400','370500','370600','370700','370800','370900','371000','371100','371300','371400','371500','371600','371700']


classes = ['加油站']

# 根据城市名称和分类关键字获取poi数据
def getpois(cityname, keywords):
    i = 1
    poilist = []
    while True:  # 使用while循环不断分页获取数据
        result = getpoi_page(cityname, keywords, i)
        print(result)
        result = json.loads(result)  # 将字符串转换为json
        if result['count'] == '0':
            break
        hand(poilist, result)
        i = i + 1
    return poilist


# 数据写入excel
def write_to_excel(poilist, cityname, classfield):
    # 一个Workbook对象,这就相当于创建了一个Excel文件
    book = xlwt.Workbook(encoding='utf-8', style_compression=0)
    sheet = book.add_sheet(classfield, cell_overwrite_ok=True)

    # 第一行(列标题)
    sheet.write(0, 0, 'id')
    sheet.write(0, 1, '名字')
    sheet.write(0, 2, '类型')
    sheet.write(0, 3, '省')
    sheet.write(0, 4, '市')
    sheet.write(0, 5, '县/区')
    sheet.write(0, 6, '地址')
    sheet.write(0, 7, '经纬度')


    for i in range(len(poilist)):

        location = poilist[i]['location']
        name = poilist[i]['name']
        #lng = str(location).split(",")[0]
        #lat = str(location).split(",")[1]
        types = poilist[i]['type']
        type = types.split(';')[2];
        pname = poilist[i]['pname']
        cityname = poilist[i]['cityname']
        adname = poilist[i]['adname']
        try:
            address = poilist[i]['address']
        except KeyError:
            address = "null"
        id = poilist[i]['id']
        '''
        result = gcj02_to_wgs84(float(lng), float(lat))
        lng = result[0]
        lat = result[1]
        '''
        # 每一行写入
        sheet.write(i + 1, 0, id)
        sheet.write(i + 1, 1, name)
        sheet.write(i + 1, 2, type)
        sheet.write(i + 1, 3, pname)
        sheet.write(i + 1, 4, cityname)
        sheet.write(i + 1, 5, adname)
        sheet.write(i + 1, 6, address)
        sheet.write(i + 1, 7, location)

        #sheet.write(i + 1, 7, lat)


    # 最后,将以上操作保存到指定的Excel文件中
    book.save(r'' + cityname + "_" + classfield + '.xls')


# 将返回的poi数据装入集合返回\
def hand(poilist, result):
    # result = json.loads(result)  # 将字符串转换为json
    pois = result['pois']
    for i in range(len(pois)):
        poilist.append(pois[i])


# 单页获取pois
def getpoi_page(cityname, keywords, page):
    req_url = poi_search_url + "?key=" + amap_web_key + '&extensions=all&keywords=' + quote(
        keywords) + '&city=' + quote(cityname) + '&citylimit=true' + '&offset=20' + '&page=' + str(
        page) + '&output=json'
    data = ''
    with request.urlopen(req_url) as f:
        data = f.read()
        data = data.decode('utf-8')
    return data


for clas in classes:
    classes_all_pois = []
    for area in nanning_areas:
        pois_area = getpois(area, clas)
        print('当前城区:' + str(area) + ', 分类:' + str(clas) + ", 总的有" + str(len(pois_area)) + "条数据")
        classes_all_pois.extend(pois_area)
    print("所有城区的数据汇总,总数为:" + str(len(classes_all_pois)))

    write_to_excel(classes_all_pois, cityname, clas)

    print('================分类:'  + str(clas) + "写入成功")