京东商品页面的爬取

选取一个商品页面

import requests
url = "https://item.jd.com/100011333796.html#crumb-wrap"
try:
    #更改头部信息,模拟浏览器访问
    kv = {
   'user-agent': 'Mozilla/5.0'}
    r = requests.get(url, headers = kv)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    print(r.text)
except:
    print("爬取失败")

百度(360)关键词提交

baidu

import requests
keyword = "Python"
try:
    kv = {
   'wd': keyword}
    r = requests.get("http://www.baidu.com/s", params = kv)
    print(r.request.url)
    r.raise_for_status()
    print(len(r.text))
except:
    print("爬取失败")

360

import requests
keyword = "Python"
try:
    kv = {
   'q': keyword}
    r = requests.get("http://www.so.com/s", params = kv)
    print(r.request.url)
    r.raise_for_status()
    print(len(r.text))
except:
    print("爬取失败")

网络图片的爬取与存储

爬取csdn上的某张图片

import requests
import os
kv = {
   'user-agent' : 'Mozilla/5.0'}
url = "https://imgconvert.csdnimg.cn/aHR0cHM6Ly9tbWJpei5xcGljLmNuL21tYml6X2pwZy8xaFJlSGFxYWZhZTN5bVlZanN2TWRmRnB4YUZiY3VNRFBOVXNPNzg0NWZST0V1cmVTNGdWWmliYXYyWTIzYlI1WXZmUWRJNjJVTWhLNWJyTWRpYXRDSnJnLzY0MA?x-oss-process=image/format,png"
root = "E://python爬虫学习//"
path = root + '不想奋斗.' + url.split(',')[-1]
try:
    if not os.path.exists(root):
        os.mkdir(root)
    if not os.path.exists(path):
        r = requests.get(url, headers = kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        #print(r.status_code)
        with open(path, 'wb') as f:
            f.write(r.content)
            f.close()
            print("文件保存成功")
    else:
        print("文件已存在")
except:
    print("爬取失败")

IP地址归属地的自动查询

手动查询网址:https://www.ip138.com/
查询中北大学网址

import requests
kv = {
   'user-agent' : 'Mozilla/5.0'}
url1 = "https://www.ip138.com/iplookup.asp?ip="
url2 = "&action=2"
try:
    r = requests.get(url1 + '202.207.177.39' + url2, headers = kv)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    print(r.text[1000:1800])
except:
    print("爬取失败")