You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
249 lines
11 KiB
249 lines
11 KiB
--- bilibili.py.orig 2020-09-06 12:43:02.935724487 -0400
|
|
+++ bilibili.py.modified 2020-09-05 23:30:38.708500829 -0400
|
|
@@ -34,7 +34,7 @@
|
|
anime/(?P<anime_id>\d+)/play\#
|
|
)(?P<id_bv>\d+)|
|
|
video/[bB][vV](?P<id>[^/?#&]+)
|
|
- )
|
|
+ )(?:/?\?p=(?P<page>\d+))?
|
|
'''
|
|
|
|
_TESTS = [{
|
|
@@ -125,18 +125,30 @@
|
|
|
|
mobj = re.match(self._VALID_URL, url)
|
|
video_id = mobj.group('id') or mobj.group('id_bv')
|
|
+ print("video_id: ", video_id)
|
|
anime_id = mobj.group('anime_id')
|
|
+ page = mobj.group('page') or 1
|
|
webpage = self._download_webpage(url, video_id)
|
|
|
|
if 'anime/' not in url:
|
|
- cid = self._search_regex(
|
|
- r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
|
- default=None
|
|
- ) or compat_parse_qs(self._search_regex(
|
|
- [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
|
- r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
|
- r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
|
- webpage, 'player parameters'))['cid'][0]
|
|
+ mobj = re.findall(r'cid(?:["\']:|=)(\d+)', webpage)
|
|
+ seen_mobj = set()
|
|
+ mobj = [c for c in mobj if int(c) > 12 and not (c in seen_mobj or seen_mobj.add(c))]
|
|
+ print(mobj)
|
|
+ cid = mobj[int(page) - 1] or \
|
|
+ compat_parse_qs(self._search_regex([
|
|
+ r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
|
+ r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
|
+ r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
|
+ webpage, 'player parameters'))['cid'][0]
|
|
+ # cid = self._search_regex(
|
|
+ # r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
|
+ # default=None
|
|
+ # ) or compat_parse_qs(self._search_regex(
|
|
+ # [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
|
+ # r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
|
|
+ # r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
|
+ # webpage, 'player parameters'))['cid'][0]
|
|
else:
|
|
if 'no_bangumi_tip' not in smuggled_data:
|
|
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
|
|
@@ -162,49 +174,154 @@
|
|
|
|
entries = []
|
|
|
|
- RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
|
- for num, rendition in enumerate(RENDITIONS, start=1):
|
|
- payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
|
- sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
|
-
|
|
- video_info = self._download_json(
|
|
- 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
|
- video_id, note='Downloading video info page',
|
|
- headers=headers, fatal=num == len(RENDITIONS))
|
|
+ payload = 'bvid=%s&cid=%s' % (video_id, cid)
|
|
+ video_info = self._download_json(
|
|
+ 'https://api.bilibili.com/x/player/playurl?%s&qn=116&type=&otype=json&fnver=0&fnval=16&fourk=1' % payload,
|
|
+ video_id, note='Downloading video info page',
|
|
+ headers=headers, fatal=True)
|
|
+
|
|
+ print("video_info: ", video_info)
|
|
+
|
|
+ video_info = video_info['data']
|
|
+
|
|
+ def find_num_segments(video_list, qualities):
|
|
+ max_segment_num = 0
|
|
+ for quality in qualities:
|
|
+ segment_num = sum(v.get('id') == quality for v in video_list)
|
|
+ if segment_num > max_segment_num:
|
|
+ max_segment_num = segment_num
|
|
+ return max_segment_num
|
|
+
|
|
+ def sort_segments(video_info, qualities):
|
|
+ videos = {v['codecid']: [] for v in video_info['dash']['video']}
|
|
+ audios = {a['codecid']: [] for a in video_info['dash']['audio']}
|
|
+
|
|
+ for v in video_info['dash']['video']:
|
|
+ videos[v['codecid']].append(v)
|
|
+ for a in video_info['dash']['audio']:
|
|
+ audios[a['codecid']].append(a)
|
|
+ for qualities in videos.values():
|
|
+ qualities.sort(key=lambda x: x['id'], reverse=True)
|
|
+ for qualities in audios.values():
|
|
+ qualities.sort(key=lambda x: x['id'], reverse=True)
|
|
+ videos = list(i[1] for i in sorted(videos.items()))
|
|
+ audios = list(i[1] for i in sorted(audios.items()))
|
|
+
|
|
+ return videos, audios
|
|
+
|
|
+ if 'durl' in video_info:
|
|
+ # Use old API
|
|
+ print("durl")
|
|
+ RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
|
+ for num, rendition in enumerate(RENDITIONS, start=1):
|
|
+ payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
|
|
+ sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
|
+
|
|
+ video_info = self._download_json(
|
|
+ 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
|
|
+ video_id, note='Downloading video info page',
|
|
+ headers=headers, fatal=num == len(RENDITIONS))
|
|
|
|
- if not video_info:
|
|
- continue
|
|
-
|
|
- if 'durl' not in video_info:
|
|
- if num < len(RENDITIONS):
|
|
+ if not video_info:
|
|
continue
|
|
- self._report_error(video_info)
|
|
|
|
- for idx, durl in enumerate(video_info['durl']):
|
|
- formats = [{
|
|
- 'url': durl['url'],
|
|
- 'filesize': int_or_none(durl['size']),
|
|
- }]
|
|
- for backup_url in durl.get('backup_url', []):
|
|
+ if 'durl' not in video_info:
|
|
+ if num < len(RENDITIONS):
|
|
+ continue
|
|
+ self._report_error(video_info)
|
|
+
|
|
+ for idx, durl in enumerate(video_info['durl']):
|
|
+ formats = [{
|
|
+ 'url': durl['url'],
|
|
+ 'filesize': int_or_none(durl['size']),
|
|
+ }]
|
|
+ for backup_url in durl.get('backup_url', []):
|
|
+ formats.append({
|
|
+ 'url': backup_url,
|
|
+ # backup URLs have lower priorities
|
|
+ 'preference': -2 if 'hd.mp4' in backup_url else -3,
|
|
+ })
|
|
+
|
|
+ for a_format in formats:
|
|
+ a_format.setdefault('http_headers', {}).update({
|
|
+ 'Referer': "https://www.bilibili.com/",
|
|
+ 'Origin': "https://www.bilibili.com",
|
|
+ })
|
|
+
|
|
+ self._sort_formats(formats)
|
|
+
|
|
+ entries.append({
|
|
+ 'id': '%s_part%s' % (video_id, idx),
|
|
+ 'duration': float_or_none(durl.get('length'), 1000),
|
|
+ 'formats': formats,
|
|
+ })
|
|
+ break
|
|
+ print("video_info_old: ", video_info)
|
|
+ elif 'dash' in video_info:
|
|
+ qualities = sorted(video_info['accept_quality'], reverse=True)
|
|
+ print("qualities: ", qualities)
|
|
+
|
|
+ # video_segment_num = find_num_segments(video_info['dash']['video'], qualities)
|
|
+ # audio_segment_num = find_num_segments(video_info['dash']['audio'], qualities)
|
|
+
|
|
+ # videos, audios = sort_segments(video_info, qualities)
|
|
+ formats = []
|
|
+
|
|
+ # Video
|
|
+ for v in video_info['dash']['video']:
|
|
+ formats.append({
|
|
+ 'url': v['baseUrl'],
|
|
+ 'vcodec': v['codecs'],
|
|
+ 'acodec': 'none',
|
|
+ 'width': v['width'],
|
|
+ 'height': v['height'],
|
|
+ 'quality': v['id'] + 1 if 'hev' in v['codecs'] else v['id']
|
|
+ })
|
|
+ if v.get('backupUrl', None) is not None:
|
|
formats.append({
|
|
- 'url': backup_url,
|
|
+ 'url': v['backupUrl'],
|
|
# backup URLs have lower priorities
|
|
- 'preference': -2 if 'hd.mp4' in backup_url else -3,
|
|
+ 'acodec': 'none',
|
|
+ 'quality': v['id'] + 1 if 'hev' in v['codecs'] else v['id'],
|
|
+ 'preference': -3,
|
|
})
|
|
|
|
- for a_format in formats:
|
|
- a_format.setdefault('http_headers', {}).update({
|
|
- 'Referer': url,
|
|
+ # Audio
|
|
+ for a in video_info['dash']['audio']:
|
|
+ formats.append({
|
|
+ 'url': a['baseUrl'],
|
|
+ 'vcodec': 'none',
|
|
+ 'acodec': a['codecs'],
|
|
+ 'quality': a['id'] - 30200
|
|
+ })
|
|
+ if a.get('backupUrl', None) is not None:
|
|
+ formats.append({
|
|
+ 'url': a['backupUrl'],
|
|
+ # backup URLs have lower priorities
|
|
+ 'vcodec': 'none',
|
|
+ 'quality': a['id'] - 30200,
|
|
+ 'preference': -3,
|
|
})
|
|
|
|
- self._sort_formats(formats)
|
|
-
|
|
- entries.append({
|
|
- 'id': '%s_part%s' % (video_id, idx),
|
|
- 'duration': float_or_none(durl.get('length'), 1000),
|
|
- 'formats': formats,
|
|
+ for a_format in formats:
|
|
+ a_format.setdefault('http_headers', {}).update({
|
|
+ 'Referer': "https://www.bilibili.com/",
|
|
+ 'Origin': "https://www.bilibili.com",
|
|
+ 'Sec-Fetch-Site': "cross-site",
|
|
+ 'Sec-Fetch-Mode': "cors",
|
|
+ 'Sec-Fetch-Dest': "empty",
|
|
})
|
|
- break
|
|
+
|
|
+ self._sort_formats(formats)
|
|
+
|
|
+ entries.append({
|
|
+ 'id': '%s' % (video_id),
|
|
+ 'duration': float_or_none(video_info.get('timelength'), 1000),
|
|
+ 'formats': formats,
|
|
+ })
|
|
+
|
|
+ else:
|
|
+ self._report_error(video_info)
|
|
|
|
title = self._html_search_regex(
|
|
('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
|
@@ -239,8 +356,10 @@
|
|
info['uploader'] = self._html_search_meta(
|
|
'author', webpage, 'uploader', default=None)
|
|
|
|
+ print("entries: \n")
|
|
for entry in entries:
|
|
entry.update(info)
|
|
+ print(entry)
|
|
|
|
if len(entries) == 1:
|
|
return entries[0]
|
|
|