目前公司正在探索关于柴油加油相关的项目,故需要让我爬取某些省份的加油站进行一个项目评估。本人通过百度,浏览csdn,发现目前没有一份十分完整满意的代码,故我结合网上和自己的修改,以下代码省时省力,复制粘贴即可用。
import json from urllib import request from urllib.parse import quote import xlwt import pandas as pd amap_web_key = '' #去高德开发者平台申请,填你自己申请的key #关键字搜索 poi_search_url = "http://restapi.amap.com/v3/place/text" #poi_boundary_url = "https://ditu.amap.com/detail/get/detail" #from transCoordinateSystem import gcj02_to_wgs84 cityname = '130000' #填你自己需要爬取的省代码 df = pd.read_excel("E:\cityNew\hebei.xlsx", usecols=[1], names=None) #这个excel表就是截取高德地图给的cityCode里关于该省下的全部 城市的代码。 df_li = df.values.tolist() nanning_areas = [] for s_li in df_li: nanning_areas.append(str(s_li[0])) #nanning_areas = ['370100','370200','370300','370400','370500','370600','370700','370800','370900','371000','371100','371300','371400','371500','371600','371700'] classes = ['加油站'] # 根据城市名称和分类关键字获取poi数据 def getpois(cityname, keywords): i = 1 poilist = [] while True: # 使用while循环不断分页获取数据 result = getpoi_page(cityname, keywords, i) print(result) result = json.loads(result) # 将字符串转换为json if result['count'] == '0': break hand(poilist, result) i = i + 1 return poilist # 数据写入excel def write_to_excel(poilist, cityname, classfield): # 一个Workbook对象,这就相当于创建了一个Excel文件 book = xlwt.Workbook(encoding='utf-8', style_compression=0) sheet = book.add_sheet(classfield, cell_overwrite_ok=True) # 第一行(列标题) sheet.write(0, 0, 'id') sheet.write(0, 1, '名字') sheet.write(0, 2, '类型') sheet.write(0, 3, '省') sheet.write(0, 4, '市') sheet.write(0, 5, '县/区') sheet.write(0, 6, '地址') sheet.write(0, 7, '经纬度') for i in range(len(poilist)): location = poilist[i]['location'] name = poilist[i]['name'] #lng = str(location).split(",")[0] #lat = str(location).split(",")[1] types = poilist[i]['type'] type = types.split(';')[2]; pname = poilist[i]['pname'] cityname = poilist[i]['cityname'] adname = poilist[i]['adname'] try: address = poilist[i]['address'] except KeyError: address = "null" id = poilist[i]['id'] ''' result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] ''' # 每一行写入 sheet.write(i + 1, 0, id) sheet.write(i + 1, 1, name) sheet.write(i + 1, 2, type) sheet.write(i + 1, 3, pname) sheet.write(i + 1, 4, cityname) sheet.write(i + 1, 5, adname) sheet.write(i + 1, 6, address) sheet.write(i + 1, 7, location) #sheet.write(i + 1, 7, lat) # 最后,将以上操作保存到指定的Excel文件中 book.save(r'' + cityname + "_" + classfield + '.xls') # 将返回的poi数据装入集合返回\ def hand(poilist, result): # result = json.loads(result) # 将字符串转换为json pois = result['pois'] for i in range(len(pois)): poilist.append(pois[i]) # 单页获取pois def getpoi_page(cityname, keywords, page): req_url = poi_search_url + "?key=" + amap_web_key + '&extensions=all&keywords=' + quote( keywords) + '&city=' + quote(cityname) + '&citylimit=true' + '&offset=20' + '&page=' + str( page) + '&output=json' data = '' with request.urlopen(req_url) as f: data = f.read() data = data.decode('utf-8') return data for clas in classes: classes_all_pois = [] for area in nanning_areas: pois_area = getpois(area, clas) print('当前城区:' + str(area) + ', 分类:' + str(clas) + ", 总的有" + str(len(pois_area)) + "条数据") classes_all_pois.extend(pois_area) print("所有城区的数据汇总,总数为:" + str(len(classes_all_pois))) write_to_excel(classes_all_pois, cityname, clas) print('================分类:' + str(clas) + "写入成功")