目前公司正在探索关于柴油加油相关的项目,故需要让我爬取某些省份的加油站进行一个项目评估。本人通过百度,浏览csdn,发现目前没有一份十分完整满意的代码,故我结合网上和自己的修改,以下代码省时省力,复制粘贴即可用。
import json
from urllib import request
from urllib.parse import quote
import xlwt
import pandas as pd
amap_web_key = '' #去高德开发者平台申请,填你自己申请的key
#关键字搜索
poi_search_url = "http://restapi.amap.com/v3/place/text"
#poi_boundary_url = "https://ditu.amap.com/detail/get/detail"
#from transCoordinateSystem import gcj02_to_wgs84
cityname = '130000' #填你自己需要爬取的省代码
df = pd.read_excel("E:\cityNew\hebei.xlsx", usecols=[1],
names=None)
#这个excel表就是截取高德地图给的cityCode里关于该省下的全部
城市的代码。
df_li = df.values.tolist()
nanning_areas = []
for s_li in df_li:
nanning_areas.append(str(s_li[0]))
#nanning_areas = ['370100','370200','370300','370400','370500','370600','370700','370800','370900','371000','371100','371300','371400','371500','371600','371700']
classes = ['加油站']
# 根据城市名称和分类关键字获取poi数据
def getpois(cityname, keywords):
i = 1
poilist = []
while True: # 使用while循环不断分页获取数据
result = getpoi_page(cityname, keywords, i)
print(result)
result = json.loads(result) # 将字符串转换为json
if result['count'] == '0':
break
hand(poilist, result)
i = i + 1
return poilist
# 数据写入excel
def write_to_excel(poilist, cityname, classfield):
# 一个Workbook对象,这就相当于创建了一个Excel文件
book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet(classfield, cell_overwrite_ok=True)
# 第一行(列标题)
sheet.write(0, 0, 'id')
sheet.write(0, 1, '名字')
sheet.write(0, 2, '类型')
sheet.write(0, 3, '省')
sheet.write(0, 4, '市')
sheet.write(0, 5, '县/区')
sheet.write(0, 6, '地址')
sheet.write(0, 7, '经纬度')
for i in range(len(poilist)):
location = poilist[i]['location']
name = poilist[i]['name']
#lng = str(location).split(",")[0]
#lat = str(location).split(",")[1]
types = poilist[i]['type']
type = types.split(';')[2];
pname = poilist[i]['pname']
cityname = poilist[i]['cityname']
adname = poilist[i]['adname']
try:
address = poilist[i]['address']
except KeyError:
address = "null"
id = poilist[i]['id']
'''
result = gcj02_to_wgs84(float(lng), float(lat))
lng = result[0]
lat = result[1]
'''
# 每一行写入
sheet.write(i + 1, 0, id)
sheet.write(i + 1, 1, name)
sheet.write(i + 1, 2, type)
sheet.write(i + 1, 3, pname)
sheet.write(i + 1, 4, cityname)
sheet.write(i + 1, 5, adname)
sheet.write(i + 1, 6, address)
sheet.write(i + 1, 7, location)
#sheet.write(i + 1, 7, lat)
# 最后,将以上操作保存到指定的Excel文件中
book.save(r'' + cityname + "_" + classfield + '.xls')
# 将返回的poi数据装入集合返回\
def hand(poilist, result):
# result = json.loads(result) # 将字符串转换为json
pois = result['pois']
for i in range(len(pois)):
poilist.append(pois[i])
# 单页获取pois
def getpoi_page(cityname, keywords, page):
req_url = poi_search_url + "?key=" + amap_web_key + '&extensions=all&keywords=' + quote(
keywords) + '&city=' + quote(cityname) + '&citylimit=true' + '&offset=20' + '&page=' + str(
page) + '&output=json'
data = ''
with request.urlopen(req_url) as f:
data = f.read()
data = data.decode('utf-8')
return data
for clas in classes:
classes_all_pois = []
for area in nanning_areas:
pois_area = getpois(area, clas)
print('当前城区:' + str(area) + ', 分类:' + str(clas) + ", 总的有" + str(len(pois_area)) + "条数据")
classes_all_pois.extend(pois_area)
print("所有城区的数据汇总,总数为:" + str(len(classes_all_pois)))
write_to_excel(classes_all_pois, cityname, clas)
print('================分类:' + str(clas) + "写入成功")
京公网安备 11010502036488号