From af285016d2b14c4445109283e7c590b31542de88 Mon Sep 17 00:00:00 2001 From: Haytam001 Date: Sun, 16 Nov 2025 12:02:13 +0100 Subject: [PATCH] [ie/yfanefa] Add extractor (#15032) Closes #14974 Authored by: Haytam001 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/yfanefa.py | 67 +++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 yt_dlp/extractor/yfanefa.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index f4eb00f059..848b608717 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2523,6 +2523,7 @@ from .yappy import ( YappyIE, YappyProfileIE, ) +from .yfanefa import YfanefaIE from .yle_areena import YleAreenaIE from .youjizz import YouJizzIE from .youku import ( diff --git a/yt_dlp/extractor/yfanefa.py b/yt_dlp/extractor/yfanefa.py new file mode 100644 index 0000000000..f1c340982a --- /dev/null +++ b/yt_dlp/extractor/yfanefa.py @@ -0,0 +1,67 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + join_nonempty, + remove_end, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class YfanefaIE(InfoExtractor): + IE_NAME = 'yfanefa' + _VALID_URL = r'https?://(?:www\.)?yfanefa\.com/(?P[^?#]+)' + _TESTS = [{ + 'url': 'https://www.yfanefa.com/record/2717', + 'info_dict': { + 'id': 'record-2717', + 'ext': 'mp4', + 'title': 'THE HALLAMSHIRE RIFLES LEAVING SHEFFIELD, 1914', + 'duration': 5239, + 'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/', + }, + }, { + 'url': 'https://www.yfanefa.com/news/53', + 'info_dict': { + 'id': 'news-53', + 'ext': 'mp4', + 'title': 'Memory Bank: Bradford Launch', + 'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/', + }, + }, { + 'url': 'https://www.yfanefa.com/evaluating_nature_matters', + 'info_dict': { + 'id': 'evaluating_nature_matters', + 'ext': 'mp4', + 'title': 'Evaluating Nature Matters', + 'thumbnail': r're:https://media\.yfanefa\.com/storage/v1/file/', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + player_data = self._search_json( + r'iwPlayer\.options\["[\w.]+"\]\s*=', webpage, 'player options', video_id) + + formats = [] + video_url = join_nonempty(player_data['url'], player_data.get('signature'), delim='') + if determine_ext(video_url) == 'm3u8': + formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls') + else: + formats = [{'url': video_url, 'ext': 'mp4'}] + + return { + 'id': video_id.strip('/').replace('/', '-'), + 'title': + self._og_search_title(webpage, default=None) + or remove_end(self._html_extract_title(webpage), ' | Yorkshire Film Archive'), + 'formats': formats, + **traverse_obj(player_data, { + 'thumbnail': ('preview', {url_or_none}), + 'duration': ('duration', {int_or_none}), + }), + }