diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2566a791c..c331bab78 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -454,7 +454,10 @@ from .curiositystream import ( CuriosityStreamIE, CuriosityStreamSeriesIE, ) -from .cwtv import CWTVIE +from .cwtv import ( + CWTVIE, + CWTVMovieIE, +) from .cybrary import ( CybraryCourseIE, CybraryIE, diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py index 24a8d676a..cdb29fcee 100644 --- a/yt_dlp/extractor/cwtv.py +++ b/yt_dlp/extractor/cwtv.py @@ -1,9 +1,12 @@ +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, parse_age_limit, parse_iso8601, + parse_qs, smuggle_url, str_or_none, update_url_query, @@ -12,7 +15,8 @@ from ..utils.traversal import traverse_obj class CWTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' + IE_NAME = 'cwtv' + _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch|guid)=(?P[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' _TESTS = [{ 'url': 'https://www.cwtv.com/shows/continuum/a-stitch-in-time/?play=9149a1e1-4cb2-46d7-81b2-47d35bbd332b', 'info_dict': { @@ -90,6 +94,9 @@ class CWTVIE(InfoExtractor): }, { 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63', 'only_matching': True, + }, { + 'url': 'http://www.cwtv.com/movies/play/?guid=0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c', + 'only_matching': True, }] def _real_extract(self, url): @@ -124,3 +131,50 @@ class CWTVIE(InfoExtractor): 'ie_key': 'ThePlatform', 'thumbnail': video_data.get('large_thumbnail'), } + + +class CWTVMovieIE(InfoExtractor): + IE_NAME = 'cwtv:movie' + _VALID_URL = r'https?://(?:www\.)?cwtv\.com/shows/(?P[\w-]+)/?\?(?:[^#]+&)?viewContext=Movies' + _TESTS = [{ + 'url': 'https://www.cwtv.com/shows/the-crush/?viewContext=Movies+Swimlane', + 'info_dict': { + 'id': '0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c', + 'ext': 'mp4', + 'title': 'The Crush', + 'upload_date': '20241112', + 'description': 'md5:1549acd90dff4a8273acd7284458363e', + 'chapters': 'count:9', + 'timestamp': 1731398400, + 'age_limit': 16, + 'duration': 5337, + 'series': 'The Crush', + 'season': 'Season 1', + 'uploader': 'CWTV', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'thumbnail': r're:https?://.+\.jpe?g', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + _UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + app_url = ( + self._html_search_meta('al:ios:url', webpage, default=None) + or self._html_search_meta('al:android:url', webpage, default=None)) + video_id = ( + traverse_obj(parse_qs(app_url), ('video_id', 0, {lambda x: re.fullmatch(self._UUID_RE, x)}, 0)) + or self._search_regex([ + rf'CWTV\.Site\.curPlayingGUID\s*=\s*["\']({self._UUID_RE})', + rf'CWTV\.Site\.viewInAppURL\s*=\s*["\']/shows/[\w-]+/watch-in-app/\?play=({self._UUID_RE})', + ], webpage, 'video ID')) + + return self.url_result( + f'https://www.cwtv.com/shows/{display_id}/{display_id}/?play={video_id}', CWTVIE, video_id)