python 爬取B站原视频的实例代码

python学习网 2020-09-10 18:54:01

这篇文章主要介绍了python 爬取B站原视频的实例代码,帮助大家更好的理解和使用python 爬虫,感兴趣的朋友可以了解下

B站原视频爬取,我就不多说直接上代码。直接运行就好。
B站是把视频和音频分开。要把2个合并起来使用。这个需要分析才能看出来。然后就是登陆这块是比较难的。

  1 import os
  2 import re
  3 import argparse
  4 import subprocess
  5 import prettytable
  6 from DecryptLogin import login
  7  
  8  
  9 '''B站类'''
 10 class Bilibili():
 11 def __init__(self, username, password, **kwargs):
 12 self.username = username
 13 self.password = password
 14 self.session = Bilibili.login(username, password)
 15 self.headers = {
 16 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
 17 }
 18 self.user_info_url = 'http://api.bilibili.com/x/space/acc/info'
 19 self.submit_videos_url = 'http://space.bilibili.com/ajax/member/getSubmitVideos'
 20 self.view_url = 'http://api.bilibili.com/x/web-interface/view'
 21 self.video_player_url = 'http://api.bilibili.com/x/player/playurl'
 22 '''运行主程序'''
 23 def run(self):
 24 while True:
 25 userid = input('请输入目标用户ID(例:345993405)(我的一个LOL好友凯撒可以关注他一下 谢谢) ——> ')
 26 user_info = self.__getUserInfo(userid)
 27 tb = prettytable.PrettyTable()
 28 tb.field_names = list(user_info.keys())
 29 tb.add_row(list(user_info.values()))
 30 print('获取的用户信息如下:')
 31 print(tb)
 32 is_download = input('是否下载该用户的所有视频(y/n, 默认: y) ——> ')
 33 if is_download == 'y' or is_download == 'yes' or not is_download:
 34 self.__downloadVideos(userid)
 35 '''根据userid获得该用户基本信息'''
 36 def __getUserInfo(self, userid):
 37 params = {'mid': userid, 'jsonp': 'jsonp'}
 38 res = self.session.get(self.user_info_url, params=params, headers=self.headers)
 39 res_json = res.json()
 40 user_info = {
 41 '用户名': res_json['data']['name'],
 42 '性别': res_json['data']['sex'],
 43 '个性签名': res_json['data']['sign'],
 44 '用户等级': res_json['data']['level'],
 45 '生日': res_json['data']['birthday']
 46 }
 47 return user_info
 48 '''下载目标用户的所有视频'''
 49 def __downloadVideos(self, userid):
 50 if not os.path.exists(userid):
 51 os.mkdir(userid)
 52 # 非会员用户只能下载到高清1080P
 53 quality = [('16', '流畅 360P'),
 54 ('32', '清晰 480P'),
 55 ('64', '高清 720P'),
 56 ('74', '高清 720P60'),
 57 ('80', '高清 1080P'),
 58 ('112', '高清 1080P+'),
 59 ('116', '高清 1080P60')][-3]
 60 # 获得用户的视频基本信息
 61 video_info = {'aids': [], 'cid_parts': [], 'titles': [], 'links': [], 'down_flags': []}
 62 params = {'mid': userid, 'pagesize': 30, 'tid': 0, 'page': 1, 'order': 'pubdate'}
 63 while True:
 64 res = self.session.get(self.submit_videos_url, headers=self.headers, params=params)
 65 res_json = res.json()
 66 for item in res_json['data']['vlist']:
 67 video_info['aids'].append(item['aid'])
 68 if len(video_info['aids']) < int(res_json['data']['count']):
 69 params['page'] += 1
 70 else:
 71 break
 72 for aid in video_info['aids']:
 73 params = {'aid': aid}
 74 res = self.session.get(self.view_url, headers=self.headers, params=params)
 75 cid_part = []
 76 for page in res.json()['data']['pages']:
 77 cid_part.append([page['cid'], page['part']])
 78 video_info['cid_parts'].append(cid_part)
 79 title = res.json()['data']['title']
 80 title = re.sub(r"[‘'\/\\\:\*\?\"\<\>\|\s']", ' ', title)
 81 video_info['titles'].append(title)
 82 print('共获取到用户ID<%s>的<%d>个视频...' % (userid, len(video_info['titles'])))
 83 for idx in range(len(video_info['titles'])):
 84 aid = video_info['aids'][idx]
 85 cid_part = video_info['cid_parts'][idx]
 86 link = []
 87 down_flag = False
 88 for cid, part in cid_part:
 89 params = {'avid': aid, 'cid': cid, 'qn': quality, 'otype': 'json', 'fnver': 0, 'fnval': 16}
 90 res = self.session.get(self.video_player_url, params=params, headers=self.headers)
 91 res_json = res.json()
 92 if 'dash' in res_json['data']:
 93 down_flag = True
 94 v, a = res_json['data']['dash']['video'][0], res_json['data']['dash']['audio'][0]
 95 link_v = [v['baseUrl']]
 96 link_a = [a['baseUrl']]
 97 if v['backup_url']:
 98 for item in v['backup_url']:
 99 link_v.append(item)
100 if a['backup_url']:
101 for item in a['backup_url']:
102 link_a.append(item)
103 link = [link_v, link_a]
104 else:
105 link = [res_json['data']['durl'][-1]['url']]
106 if res_json['data']['durl'][-1]['backup_url']:
107 for item in res_json['data']['durl'][-1]['backup_url']:
108 link.append(item)
109 video_info['links'].append(link)
110 video_info['down_flags'].append(down_flag)
111 # 开始下载
112 out_pipe_quiet = subprocess.PIPE
113 out_pipe = None
114 aria2c_path = os.path.join(os.getcwd(), 'tools/aria2c')
115 ffmpeg_path = os.path.join(os.getcwd(), 'tools/ffmpeg')
116 for idx in range(len(video_info['titles'])):
117 title = video_info['titles'][idx]
118 aid = video_info['aids'][idx]
119 down_flag = video_info['down_flags'][idx]
120 print('正在下载视频<%s>...' % title)
121 if down_flag:
122 link_v, link_a = video_info['links'][idx]
123 # --视频
124 url = '"{}"'.format('" "'.join(link_v))
125 command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}'
126 command = command.format(aria2c_path, len(link_v), userid, title+'.flv', aid, "", url)
127 print(command)
128 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)
129 process.wait()
130 # --音频
131 url = '"{}"'.format('" "'.join(link_a))
132 command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}'
133 command = command.format(aria2c_path, len(link_v), userid, title+'.aac', aid, "", url)
134 print(command)
135  
136 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)
137 process.wait()
138 # --合并
139 command = '{} -i "{}" -i "{}" -c copy -f mp4 -y "{}"'
140 command = command.format(ffmpeg_path, os.path.join(userid, title+'.flv'), os.path.join(userid, title+'.aac'), os.path.join(userid, title+'.mp4'))
141 print(command)
142  
143 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe_quiet, shell=True)
144 process.wait()
145 os.remove(os.path.join(userid, title+'.flv'))
146 os.remove(os.path.join(userid, title+'.aac'))
147 else:
148 link = video_info['links'][idx]
149 url = '"{}"'.format('" "'.join(link))
150 command = '{} -c -k 1M -x {} -d "{}" -o "{}" --referer="https://www.bilibili.com/video/av{}" {} {}'
151 command = command.format(aria2c_path, len(link), userid, title+'.flv', aid, "", url)
152 process = subprocess.Popen(command, stdout=out_pipe, stderr=out_pipe, shell=True)
153 process.wait()
154 os.rename(os.path.join(userid, title+'.flv'), os.path.join(userid, title+'.mp4'))
155 print('所有视频下载完成, 该用户所有视频保存在<%s>文件夹中...' % (userid))
156 '''借助大佬开源的库来登录B站'''
157 @staticmethod
158 def login(username, password):
159 _, session = login.Login().bilibili(username, password)
160 return session
161  
162  
163 '''run'''
164 if __name__ == '__main__':
165 parser = argparse.ArgumentParser(description='下载B站指定用户的所有视频(仅支持Windows下使用)')
166 parser.add_argument('--username', dest='username', help='xxx', type=str, required=True)
167 parser.add_argument('--password', dest='password', help='xxxx', type=str, required=True)
168 print(parser)
169 args = parser.parse_args(['--password', 'xxxx','--username', 'xxx'])
170 # args = parser.parse_args(['--password', 'FOO'])
171 print('5')
172 bili = Bilibili(args.username, args.password)
173 bili.run()

把账号密码填上就行。以上就是python 爬取B站原视频的实例代码的详细内容

福利:如果你的学习时间不是很紧张,并且又想快速的提搞Python开发技术,Python爬虫,Python大数据分析,人工智能等技术,这里给大家分享一套系统教学资源,加一下我建的Python技术的学习裙;九三七六六七五零九,一起学习。群里有相关开发工具,学习教程!

阅读(2376) 评论(0)