哈哈,今天的话题有点那什么了哈。咱们应该秉承学习技术的角度来看,那么就开始今天的话题吧。
思路来源
今天很偶然的一个机会,看了下美拍网视频,于是就激发了我的好奇心,就写了一个简单的测试用例。神奇的发现这一招竟然是管用的。那还等什么咯。
————————————————
url: https://www.meipai.com
第一步拿到href="/media/1132146237
拼接url https://www.meipai.com/media/1132146237
第二步,找到视频真实链接src 获取data_video 并解密
http://mvvideo11.meitudata.com/5d42a22f5d0a4sb4jt0lyk9825_H264_1_8fddcb689e74d0.mp4
下载、保存
import requests, re
from bs4 import BeautifulSoup
import base64
# 解密美拍视频真实地址
def decode(encoded_string):
def getHex(param1):
return {
'str': param1[4:],
'hex': ''.join(list(param1[:4])[::-1]),
}
def getDec(param1):
loc2 = str(int(param1, 16))
return {
'pre': list(loc2[:2]),
'tail': list(loc2[2:]),
}
def substr(param1, param2):
loc3 = param1[0: int(param2[0])]
loc4 = param1[int(param2[0]): int(param2[0]) + int(param2[1])]
return loc3 + param1[int(param2[0]):].replace(loc4, "")
def getPos(param1, param2):
param2[0] = len(param1) - int(param2[0]) - int(param2[1])
return param2
dict2 = getHex(encoded_string)
dict3 = getDec(dict2['hex'])
str4 = substr(dict2['str'], dict3['pre'])
return base64.b64decode(substr(str4, getPos(str4, dict3['tail'])))
#解析一级页面
def first_parse(content):
soup = BeautifulSoup(content, 'lxml')
oa_list = soup.select('.home-hot .pr .content-l-p')
return oa_list
#解析二级页面
def second_parse(context1):
soup = BeautifulSoup(context1, 'lxml')
video_src = soup.select('.detail-content #detailVideo')[0]['data-video']
video_href = decode(video_src).decode('utf8')
# print(video_href)
return video_href
#运行主函数
def main():
url = 'https://www.meipai.com'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
}
s = requests.get(url=url, headers=headers)
content = s.text
oa_list = first_parse(content)
for oa in oa_list:
# print(oa)
url1 = url + oa['href']
s1 = requests.get(url=url1, headers=headers)
content1 = s1.text
video_href = second_parse(content1)
# 下载视频
s2 = requests.get(url=video_href, headers=headers, stream=True)
with open('aaa.mp4', 'wb') as f:
f.write(s2.content)
f.close()
exit()
if __name__ == '__main__':
main()