Wrapper over youtube-dl for searching and batch downloading bilibili videos.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
11 KiB

--- bilibili.py.orig 2020-09-06 12:43:02.935724487 -0400
+++ bilibili.py.modified 2020-09-05 23:30:38.708500829 -0400
@@ -34,7 +34,7 @@
anime/(?P<anime_id>\d+)/play\#
)(?P<id_bv>\d+)|
video/[bB][vV](?P<id>[^/?#&]+)
- )
+ )(?:/?\?p=(?P<page>\d+))?
'''
_TESTS = [{
@@ -125,18 +125,30 @@
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') or mobj.group('id_bv')
+ print("video_id: ", video_id)
anime_id = mobj.group('anime_id')
+ page = mobj.group('page') or 1
webpage = self._download_webpage(url, video_id)
if 'anime/' not in url:
- cid = self._search_regex(
- r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
- default=None
- ) or compat_parse_qs(self._search_regex(
- [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
- r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
- r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
- webpage, 'player parameters'))['cid'][0]
+ mobj = re.findall(r'cid(?:["\']:|=)(\d+)', webpage)
+ seen_mobj = set()
+ mobj = [c for c in mobj if int(c) > 12 and not (c in seen_mobj or seen_mobj.add(c))]
+ print(mobj)
+ cid = mobj[int(page) - 1] or \
+ compat_parse_qs(self._search_regex([
+ r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+ r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
+ r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
+ webpage, 'player parameters'))['cid'][0]
+ # cid = self._search_regex(
+ # r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
+ # default=None
+ # ) or compat_parse_qs(self._search_regex(
+ # [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+ # r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
+ # r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
+ # webpage, 'player parameters'))['cid'][0]
else:
if 'no_bangumi_tip' not in smuggled_data:
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
@@ -162,49 +174,154 @@
entries = []
- RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
- for num, rendition in enumerate(RENDITIONS, start=1):
- payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
- sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
-
- video_info = self._download_json(
- 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
- video_id, note='Downloading video info page',
- headers=headers, fatal=num == len(RENDITIONS))
+ payload = 'bvid=%s&cid=%s' % (video_id, cid)
+ video_info = self._download_json(
+ 'https://api.bilibili.com/x/player/playurl?%s&qn=116&type=&otype=json&fnver=0&fnval=16&fourk=1' % payload,
+ video_id, note='Downloading video info page',
+ headers=headers, fatal=True)
+
+ print("video_info: ", video_info)
+
+ video_info = video_info['data']
+
+ def find_num_segments(video_list, qualities):
+ max_segment_num = 0
+ for quality in qualities:
+ segment_num = sum(v.get('id') == quality for v in video_list)
+ if segment_num > max_segment_num:
+ max_segment_num = segment_num
+ return max_segment_num
+
+ def sort_segments(video_info, qualities):
+ videos = {v['codecid']: [] for v in video_info['dash']['video']}
+ audios = {a['codecid']: [] for a in video_info['dash']['audio']}
+
+ for v in video_info['dash']['video']:
+ videos[v['codecid']].append(v)
+ for a in video_info['dash']['audio']:
+ audios[a['codecid']].append(a)
+ for qualities in videos.values():
+ qualities.sort(key=lambda x: x['id'], reverse=True)
+ for qualities in audios.values():
+ qualities.sort(key=lambda x: x['id'], reverse=True)
+ videos = list(i[1] for i in sorted(videos.items()))
+ audios = list(i[1] for i in sorted(audios.items()))
+
+ return videos, audios
+
+ if 'durl' in video_info:
+ # Use old API
+ print("durl")
+ RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
+ for num, rendition in enumerate(RENDITIONS, start=1):
+ payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
+ sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
+
+ video_info = self._download_json(
+ 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
+ video_id, note='Downloading video info page',
+ headers=headers, fatal=num == len(RENDITIONS))
- if not video_info:
- continue
-
- if 'durl' not in video_info:
- if num < len(RENDITIONS):
+ if not video_info:
continue
- self._report_error(video_info)
- for idx, durl in enumerate(video_info['durl']):
- formats = [{
- 'url': durl['url'],
- 'filesize': int_or_none(durl['size']),
- }]
- for backup_url in durl.get('backup_url', []):
+ if 'durl' not in video_info:
+ if num < len(RENDITIONS):
+ continue
+ self._report_error(video_info)
+
+ for idx, durl in enumerate(video_info['durl']):
+ formats = [{
+ 'url': durl['url'],
+ 'filesize': int_or_none(durl['size']),
+ }]
+ for backup_url in durl.get('backup_url', []):
+ formats.append({
+ 'url': backup_url,
+ # backup URLs have lower priorities
+ 'preference': -2 if 'hd.mp4' in backup_url else -3,
+ })
+
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': "https://www.bilibili.com/",
+ 'Origin': "https://www.bilibili.com",
+ })
+
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': '%s_part%s' % (video_id, idx),
+ 'duration': float_or_none(durl.get('length'), 1000),
+ 'formats': formats,
+ })
+ break
+ print("video_info_old: ", video_info)
+ elif 'dash' in video_info:
+ qualities = sorted(video_info['accept_quality'], reverse=True)
+ print("qualities: ", qualities)
+
+ # video_segment_num = find_num_segments(video_info['dash']['video'], qualities)
+ # audio_segment_num = find_num_segments(video_info['dash']['audio'], qualities)
+
+ # videos, audios = sort_segments(video_info, qualities)
+ formats = []
+
+ # Video
+ for v in video_info['dash']['video']:
+ formats.append({
+ 'url': v['baseUrl'],
+ 'vcodec': v['codecs'],
+ 'acodec': 'none',
+ 'width': v['width'],
+ 'height': v['height'],
+ 'quality': v['id'] + 1 if 'hev' in v['codecs'] else v['id']
+ })
+ if v.get('backupUrl', None) is not None:
formats.append({
- 'url': backup_url,
+ 'url': v['backupUrl'],
# backup URLs have lower priorities
- 'preference': -2 if 'hd.mp4' in backup_url else -3,
+ 'acodec': 'none',
+ 'quality': v['id'] + 1 if 'hev' in v['codecs'] else v['id'],
+ 'preference': -3,
})
- for a_format in formats:
- a_format.setdefault('http_headers', {}).update({
- 'Referer': url,
+ # Audio
+ for a in video_info['dash']['audio']:
+ formats.append({
+ 'url': a['baseUrl'],
+ 'vcodec': 'none',
+ 'acodec': a['codecs'],
+ 'quality': a['id'] - 30200
+ })
+ if a.get('backupUrl', None) is not None:
+ formats.append({
+ 'url': a['backupUrl'],
+ # backup URLs have lower priorities
+ 'vcodec': 'none',
+ 'quality': a['id'] - 30200,
+ 'preference': -3,
})
- self._sort_formats(formats)
-
- entries.append({
- 'id': '%s_part%s' % (video_id, idx),
- 'duration': float_or_none(durl.get('length'), 1000),
- 'formats': formats,
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': "https://www.bilibili.com/",
+ 'Origin': "https://www.bilibili.com",
+ 'Sec-Fetch-Site': "cross-site",
+ 'Sec-Fetch-Mode': "cors",
+ 'Sec-Fetch-Dest': "empty",
})
- break
+
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': '%s' % (video_id),
+ 'duration': float_or_none(video_info.get('timelength'), 1000),
+ 'formats': formats,
+ })
+
+ else:
+ self._report_error(video_info)
title = self._html_search_regex(
('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
@@ -239,8 +356,10 @@
info['uploader'] = self._html_search_meta(
'author', webpage, 'uploader', default=None)
+ print("entries: \n")
for entry in entries:
entry.update(info)
+ print(entry)
if len(entries) == 1:
return entries[0]