mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-12-10 07:22:17 +01:00
[ie/web.archive:youtube] Fix extractor (#15234)
Closes #15233 Authored by: seproDev
This commit is contained in:
@@ -704,6 +704,24 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg',
|
'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg',
|
||||||
'upload_date': '20160107',
|
'upload_date': '20160107',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# dmuxed formats
|
||||||
|
'url': 'https://web.archive.org/web/20240922160632/https://www.youtube.com/watch?v=z7hzvTL3k1k',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'z7hzvTL3k1k',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': 'Praise the Lord and Pass the Ammunition (BARRXN REMIX)',
|
||||||
|
'description': 'md5:45dbf2c71c23b0734c8dfb82dd1e94b6',
|
||||||
|
'uploader': 'Barrxn',
|
||||||
|
'uploader_id': 'TheRockstar6086',
|
||||||
|
'uploader_url': 'https://www.youtube.com/user/TheRockstar6086',
|
||||||
|
'channel_id': 'UCjJPGUTtvR9uizmawn2ThqA',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCjJPGUTtvR9uizmawn2ThqA',
|
||||||
|
'duration': 125,
|
||||||
|
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||||
|
'upload_date': '20201207',
|
||||||
|
},
|
||||||
|
'params': {'format': 'bv'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -1060,6 +1078,19 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
|
||||||
return orderedSet(filter(None, capture_dates))
|
return orderedSet(filter(None, capture_dates))
|
||||||
|
|
||||||
|
def _parse_fmt(self, fmt, extra_info=None):
|
||||||
|
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
||||||
|
return {
|
||||||
|
'format_id': format_id,
|
||||||
|
**self._FORMATS.get(format_id, {}),
|
||||||
|
**traverse_obj(fmt, {
|
||||||
|
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
||||||
|
'ext': ('ext', {str}),
|
||||||
|
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
||||||
|
}),
|
||||||
|
**(extra_info or {}),
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2')
|
||||||
url_date = url_date or url_date_2
|
url_date = url_date or url_date_2
|
||||||
@@ -1090,17 +1121,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
if video_info.get('dmux'):
|
||||||
format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0))
|
for vf in traverse_obj(video_info, ('formats', 'video', lambda _, v: url_or_none(v['url']))):
|
||||||
formats.append({
|
formats.append(self._parse_fmt(vf, {'acodec': 'none'}))
|
||||||
'format_id': format_id,
|
for af in traverse_obj(video_info, ('formats', 'audio', lambda _, v: url_or_none(v['url']))):
|
||||||
**self._FORMATS.get(format_id, {}),
|
formats.append(self._parse_fmt(af, {'vcodec': 'none'}))
|
||||||
**traverse_obj(fmt, {
|
else:
|
||||||
'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}),
|
for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))):
|
||||||
'ext': ('ext', {str}),
|
formats.append(self._parse_fmt(fmt))
|
||||||
'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}),
|
|
||||||
}),
|
|
||||||
})
|
|
||||||
info['formats'] = formats
|
info['formats'] = formats
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|||||||
Reference in New Issue
Block a user