plugins.bloomberg: refactor and fix data regex

This commit is contained in:
bastimeyer 2024-02-28 04:17:27 +01:00 committed by Sebastian Meyer
parent e077e2fccd
commit 247c6cfa11
2 changed files with 38 additions and 25 deletions

View File

@ -16,14 +16,14 @@ from streamlink.stream.hls import HLSStream
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@pluginmatcher(re.compile(r""" @pluginmatcher(
https?://(?:www\.)?bloomberg\.com/ name="live",
(?: pattern=re.compile(r"https?://(?:www\.)?bloomberg\.com/live(?:/(?P<channel>[^/]+))?"),
(?P<live>live)(?:/(?P<channel>[^/]+))? )
| @pluginmatcher(
news/videos/[^/]+/[^/]+ name="vod",
) pattern=re.compile(r"https?://(?:www\.)?bloomberg\.com/news/videos/[^/]+/[^/]+"),
""", re.VERBOSE)) )
class Bloomberg(Plugin): class Bloomberg(Plugin):
LIVE_API_URL = "https://cdn.gotraffic.net/projector/latest/assets/config/config.min.json?v=1" LIVE_API_URL = "https://cdn.gotraffic.net/projector/latest/assets/config/config.min.json?v=1"
VOD_API_URL = "https://www.bloomberg.com/api/embed?id={0}" VOD_API_URL = "https://www.bloomberg.com/api/embed?id={0}"
@ -106,21 +106,23 @@ class Bloomberg(Plugin):
def _get_streams(self): def _get_streams(self):
del self.session.http.headers["Accept-Encoding"] del self.session.http.headers["Accept-Encoding"]
try: data = self.session.http.get(self.url, schema=validate.Schema(
data = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html(),
validate.parse_html(), validate.xml_xpath_string(".//script[contains(text(),'window.__PRELOADED_STATE__')][1]/text()"),
validate.xml_xpath_string(".//script[contains(text(),'window.__PRELOADED_STATE__')][1]/text()"), validate.none_or_all(
str, re.compile(r"\bwindow\.__PRELOADED_STATE__\s*=\s*(?P<json>{.+?})\s*;(?:\s|$)"),
validate.regex(re.compile(r"^\s*window\.__PRELOADED_STATE__\s*=\s*({.+})\s*;?\s*$", re.DOTALL)), validate.none_or_all(
validate.get(1), validate.get("json"),
validate.parse_json(), validate.parse_json(),
)) ),
except PluginError: ),
))
if not data:
log.error("Could not find JSON data. Invalid URL or bot protection...") log.error("Could not find JSON data. Invalid URL or bot protection...")
return return
if self.match.group("live"): if self.matches["live"]:
streams = self._get_live_streams(data, self.match.group("channel") or self.DEFAULT_CHANNEL) streams = self._get_live_streams(data, self.match["channel"] or self.DEFAULT_CHANNEL)
else: else:
streams = self._get_vod_streams(data) streams = self._get_vod_streams(data)

View File

@ -6,11 +6,22 @@ class TestPluginCanHandleUrlBloomberg(PluginCanHandleUrl):
__plugin__ = Bloomberg __plugin__ = Bloomberg
should_match_groups = [ should_match_groups = [
("https://www.bloomberg.com/live", {"live": "live"}), (
("https://www.bloomberg.com/live/", {"live": "live"}), ("live", "https://www.bloomberg.com/live"),
("https://www.bloomberg.com/live/europe", {"live": "live", "channel": "europe"}), {},
("https://www.bloomberg.com/live/europe/", {"live": "live", "channel": "europe"}), ),
("https://www.bloomberg.com/news/videos/2022-08-10/-bloomberg-surveillance-early-edition-full-08-10-22", {}), (
("live", "https://www.bloomberg.com/live/europe"),
{"channel": "europe"},
),
(
("live", "https://www.bloomberg.com/live/us"),
{"channel": "us"},
),
(
("vod", "https://www.bloomberg.com/news/videos/2022-08-10/-bloomberg-surveillance-early-edition-full-08-10-22"),
{},
),
] ]
should_not_match = [ should_not_match = [