From 247c6cfa11d53355b642ba544815d64f1ec8e914 Mon Sep 17 00:00:00 2001 From: bastimeyer Date: Wed, 28 Feb 2024 04:17:27 +0100 Subject: [PATCH] plugins.bloomberg: refactor and fix data regex --- src/streamlink/plugins/bloomberg.py | 42 +++++++++++++++-------------- tests/plugins/test_bloomberg.py | 21 +++++++++++---- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/src/streamlink/plugins/bloomberg.py b/src/streamlink/plugins/bloomberg.py index 2beedc66..b7aeb6df 100644 --- a/src/streamlink/plugins/bloomberg.py +++ b/src/streamlink/plugins/bloomberg.py @@ -16,14 +16,14 @@ from streamlink.stream.hls import HLSStream log = logging.getLogger(__name__) -@pluginmatcher(re.compile(r""" - https?://(?:www\.)?bloomberg\.com/ - (?: - (?Plive)(?:/(?P[^/]+))? - | - news/videos/[^/]+/[^/]+ - ) -""", re.VERBOSE)) +@pluginmatcher( + name="live", + pattern=re.compile(r"https?://(?:www\.)?bloomberg\.com/live(?:/(?P[^/]+))?"), +) +@pluginmatcher( + name="vod", + pattern=re.compile(r"https?://(?:www\.)?bloomberg\.com/news/videos/[^/]+/[^/]+"), +) class Bloomberg(Plugin): LIVE_API_URL = "https://cdn.gotraffic.net/projector/latest/assets/config/config.min.json?v=1" VOD_API_URL = "https://www.bloomberg.com/api/embed?id={0}" @@ -106,21 +106,23 @@ class Bloomberg(Plugin): def _get_streams(self): del self.session.http.headers["Accept-Encoding"] - try: - data = self.session.http.get(self.url, schema=validate.Schema( - validate.parse_html(), - validate.xml_xpath_string(".//script[contains(text(),'window.__PRELOADED_STATE__')][1]/text()"), - str, - validate.regex(re.compile(r"^\s*window\.__PRELOADED_STATE__\s*=\s*({.+})\s*;?\s*$", re.DOTALL)), - validate.get(1), - validate.parse_json(), - )) - except PluginError: + data = self.session.http.get(self.url, schema=validate.Schema( + validate.parse_html(), + validate.xml_xpath_string(".//script[contains(text(),'window.__PRELOADED_STATE__')][1]/text()"), + validate.none_or_all( + re.compile(r"\bwindow\.__PRELOADED_STATE__\s*=\s*(?P{.+?})\s*;(?:\s|$)"), + validate.none_or_all( + validate.get("json"), + validate.parse_json(), + ), + ), + )) + if not data: log.error("Could not find JSON data. Invalid URL or bot protection...") return - if self.match.group("live"): - streams = self._get_live_streams(data, self.match.group("channel") or self.DEFAULT_CHANNEL) + if self.matches["live"]: + streams = self._get_live_streams(data, self.match["channel"] or self.DEFAULT_CHANNEL) else: streams = self._get_vod_streams(data) diff --git a/tests/plugins/test_bloomberg.py b/tests/plugins/test_bloomberg.py index 5c113ccf..35f09dbb 100644 --- a/tests/plugins/test_bloomberg.py +++ b/tests/plugins/test_bloomberg.py @@ -6,11 +6,22 @@ class TestPluginCanHandleUrlBloomberg(PluginCanHandleUrl): __plugin__ = Bloomberg should_match_groups = [ - ("https://www.bloomberg.com/live", {"live": "live"}), - ("https://www.bloomberg.com/live/", {"live": "live"}), - ("https://www.bloomberg.com/live/europe", {"live": "live", "channel": "europe"}), - ("https://www.bloomberg.com/live/europe/", {"live": "live", "channel": "europe"}), - ("https://www.bloomberg.com/news/videos/2022-08-10/-bloomberg-surveillance-early-edition-full-08-10-22", {}), + ( + ("live", "https://www.bloomberg.com/live"), + {}, + ), + ( + ("live", "https://www.bloomberg.com/live/europe"), + {"channel": "europe"}, + ), + ( + ("live", "https://www.bloomberg.com/live/us"), + {"channel": "us"}, + ), + ( + ("vod", "https://www.bloomberg.com/news/videos/2022-08-10/-bloomberg-surveillance-early-edition-full-08-10-22"), + {}, + ), ] should_not_match = [