streamlink/src/streamlink/plugins/bloomberg.py

184 lines
6.7 KiB
Python

import logging
import re
from functools import partial
from streamlink.plugin import Plugin
from streamlink.plugin.api import validate, useragents
from streamlink.stream import HDSStream, HLSStream, HTTPStream
from streamlink.utils import parse_json, update_scheme
log = logging.getLogger(__name__)
class Bloomberg(Plugin):
VOD_API_URL = 'https://www.bloomberg.com/api/embed?id={0}'
PLAYER_URL = 'https://cdn.gotraffic.net/projector/latest/bplayer.js'
CHANNEL_MAP = {
'audio': 'BBG_RADIO',
'live/europe': 'EU',
'live/us': 'US',
'live/asia': 'ASIA',
'live/stream': 'EVENT',
'live/emea': 'EMEA_EVENT',
'live/asia_stream': 'ASIA_EVENT'
}
_url_re = re.compile(r'''
https?://(?:www\.)?bloomberg\.com/
(?:
news/videos/[^/]+/[^/]+|
live/(?P<channel>.+)/?
)
''', re.VERBOSE)
_live_player_re = re.compile(r'{APP_BUNDLE:"(?P<live_player_url>.+?/app.js)"')
_js_to_json_re = partial(re.compile(r'(\w+):(["\']|\d?\.?\d+,|true|false|\[|{)').sub, r'"\1":\2')
_video_id_re = re.compile(r'data-bmmr-id=\\"(?P<video_id>.+?)\\"')
_mp4_bitrate_re = re.compile(r'.*_(?P<bitrate>[0-9]+)\.mp4')
_preload_state_re = re.compile(r'window.__PRELOADED_STATE__\s*=\s*({.*});', re.DOTALL)
_live_stream_info_re = re.compile(r'12:.*t.exports=({.*})},{}],13:', re.DOTALL)
_live_streams_schema = validate.Schema(
validate.transform(_js_to_json_re),
validate.transform(lambda x: x.replace(':.', ':0.')),
validate.transform(parse_json),
validate.Schema({
validate.text: {
validate.optional('cdns'): validate.all(
[
validate.Schema(
{
'streams': validate.all([
validate.Schema(
{'url': validate.transform(lambda x: re.sub(r'(https?:/)([^/])', r'\1/\2', x))},
validate.get('url'),
validate.url()
)
]),
},
validate.get('streams')
)
],
validate.transform(lambda x: [i for y in x for i in y])
)
}
},
)
)
_channel_list_schema = validate.Schema(
validate.transform(parse_json),
{"live": {"channels": {"byChannelId": {
validate.text: validate.all({"liveId": validate.text}, validate.get("liveId"))
}}}},
validate.get("live"),
validate.get("channels"),
validate.get("byChannelId"),
)
_vod_api_schema = validate.Schema(
{
'secureStreams': validate.all([
validate.Schema(
{'url': validate.url()},
validate.get('url')
)
]),
'streams': validate.all([
validate.Schema(
{'url': validate.url()},
validate.get('url')
)
]),
'contentLoc': validate.url(),
},
validate.transform(lambda x: list(set(x['secureStreams'] + x['streams'] + [x['contentLoc']])))
)
@classmethod
def can_handle_url(cls, url):
return Bloomberg._url_re.match(url)
def _get_live_streams(self):
# Get channel id
match = self._url_re.match(self.url)
channel = match.group('channel')
res = self.session.http.get(self.url, headers={
"authority": "www.bloomberg.com",
"upgrade-insecure-requests": "1",
"dnt": "1",
"accept": ";".join([
"text/html,application/xhtml+xml,application/xml",
"q=0.9,image/webp,image/apng,*/*",
"q=0.8,application/signed-exchange",
"v=b3"
])
})
if "Are you a robot?" in res.text:
log.error("Are you a robot?")
match = self._preload_state_re.search(res.text)
if match:
live_ids = self._channel_list_schema.validate(match.group(1))
live_id = live_ids.get(channel)
if live_id:
log.debug("Found liveId = {0}".format(live_id))
# Retrieve live player URL
res = self.session.http.get(self.PLAYER_URL)
match = self._live_player_re.search(res.text)
if match is None:
return []
live_player_url = update_scheme(self.url, match.group('live_player_url'))
# Extract streams from the live player page
log.debug("Player URL: {0}".format(live_player_url))
res = self.session.http.get(live_player_url)
match = self._live_stream_info_re.search(res.text)
if match:
stream_info = self._live_streams_schema.validate(match.group(1))
data = stream_info.get(live_id, {})
return data.get('cdns', [])
else:
log.error("Could not find liveId for channel '{0}'".format(channel))
return []
def _get_vod_streams(self):
# Retrieve URL page and search for video ID
res = self.session.http.get(self.url)
match = self._video_id_re.search(res.text)
if match is None:
return []
video_id = match.group('video_id')
res = self.session.http.get(self.VOD_API_URL.format(video_id))
streams = self.session.http.json(res, schema=self._vod_api_schema)
return streams
def _get_streams(self):
self.session.http.headers.update({"User-Agent": useragents.CHROME})
if '/news/videos/' in self.url:
# VOD
streams = self._get_vod_streams()
else:
# Live
streams = self._get_live_streams()
for video_url in streams:
log.debug("Found stream: {0}".format(video_url))
if '.f4m' in video_url:
for stream in HDSStream.parse_manifest(self.session, video_url).items():
yield stream
elif '.m3u8' in video_url:
for stream in HLSStream.parse_variant_playlist(self.session, video_url).items():
yield stream
if '.mp4' in video_url:
match = self._mp4_bitrate_re.match(video_url)
if match is not None:
bitrate = match.group('bitrate') + 'k'
else:
bitrate = 'vod'
yield bitrate, HTTPStream(self.session, video_url)
__plugin__ = Bloomberg