diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 3746c58fb7..02c39beb68 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -704,6 +704,24 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': 'https://web.archive.org/web/20160108040020if_/https://i.ytimg.com/vi/SQCom7wjGDs/maxresdefault.jpg', 'upload_date': '20160107', }, + }, { + # dmuxed formats + 'url': 'https://web.archive.org/web/20240922160632/https://www.youtube.com/watch?v=z7hzvTL3k1k', + 'info_dict': { + 'id': 'z7hzvTL3k1k', + 'ext': 'webm', + 'title': 'Praise the Lord and Pass the Ammunition (BARRXN REMIX)', + 'description': 'md5:45dbf2c71c23b0734c8dfb82dd1e94b6', + 'uploader': 'Barrxn', + 'uploader_id': 'TheRockstar6086', + 'uploader_url': 'https://www.youtube.com/user/TheRockstar6086', + 'channel_id': 'UCjJPGUTtvR9uizmawn2ThqA', + 'channel_url': 'https://www.youtube.com/channel/UCjJPGUTtvR9uizmawn2ThqA', + 'duration': 125, + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'upload_date': '20201207', + }, + 'params': {'format': 'bv'}, }, { 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', 'only_matching': True, @@ -1060,6 +1078,19 @@ class YoutubeWebArchiveIE(InfoExtractor): capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE]) return orderedSet(filter(None, capture_dates)) + def _parse_fmt(self, fmt, extra_info=None): + format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0)) + return { + 'format_id': format_id, + **self._FORMATS.get(format_id, {}), + **traverse_obj(fmt, { + 'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}), + 'ext': ('ext', {str}), + 'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}), + }), + **(extra_info or {}), + } + def _real_extract(self, url): video_id, url_date, url_date_2 = self._match_valid_url(url).group('id', 'date', 'date2') url_date = url_date or url_date_2 @@ -1090,17 +1121,14 @@ class YoutubeWebArchiveIE(InfoExtractor): info['thumbnails'] = self._extract_thumbnails(video_id) formats = [] - for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))): - format_id = traverse_obj(fmt, ('url', {parse_qs}, 'itag', 0)) - formats.append({ - 'format_id': format_id, - **self._FORMATS.get(format_id, {}), - **traverse_obj(fmt, { - 'url': ('url', {lambda x: f'https://web.archive.org/web/2id_/{x}'}), - 'ext': ('ext', {str}), - 'filesize': ('url', {parse_qs}, 'clen', 0, {int_or_none}), - }), - }) + if video_info.get('dmux'): + for vf in traverse_obj(video_info, ('formats', 'video', lambda _, v: url_or_none(v['url']))): + formats.append(self._parse_fmt(vf, {'acodec': 'none'})) + for af in traverse_obj(video_info, ('formats', 'audio', lambda _, v: url_or_none(v['url']))): + formats.append(self._parse_fmt(af, {'vcodec': 'none'})) + else: + for fmt in traverse_obj(video_info, ('formats', lambda _, v: url_or_none(v['url']))): + formats.append(self._parse_fmt(fmt)) info['formats'] = formats return info