streamlink/src/streamlink/stream/hls.py

788 lines
30 KiB
Python

import logging
import re
import struct
from concurrent.futures import Future
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
from urllib.parse import urlparse
# noinspection PyPackageRequirements
from Crypto.Cipher import AES
# noinspection PyPackageRequirements
from Crypto.Util.Padding import unpad
from requests import Response
from requests.exceptions import ChunkedEncodingError, ConnectionError, ContentDecodingError
from streamlink.buffers import RingBuffer
from streamlink.exceptions import StreamError
from streamlink.stream.ffmpegmux import FFMPEGMuxer, MuxedStream
from streamlink.stream.filtered import FilteredStream
from streamlink.stream.hls_playlist import M3U8, ByteRange, Key, Map, Media, Segment, load as load_hls_playlist
from streamlink.stream.http import HTTPStream
from streamlink.stream.segmented import SegmentedStreamReader, SegmentedStreamWorker, SegmentedStreamWriter
from streamlink.utils.cache import LRUCache
from streamlink.utils.formatter import Formatter
log = logging.getLogger(__name__)
class Sequence(NamedTuple):
num: int
segment: Segment
class ByteRangeOffset:
sequence: Optional[int] = None
offset: Optional[int] = None
@staticmethod
def _calc_end(start: int, size: int) -> int:
return start + max(size - 1, 0)
def cached(self, sequence: int, byterange: ByteRange) -> Tuple[int, int]:
if byterange.offset is not None:
bytes_start = byterange.offset
elif self.offset is not None and self.sequence == sequence - 1:
bytes_start = self.offset
else:
raise StreamError("Missing BYTERANGE offset")
bytes_end = self._calc_end(bytes_start, byterange.range)
self.sequence = sequence
self.offset = bytes_end + 1
return bytes_start, bytes_end
def uncached(self, byterange: ByteRange) -> Tuple[int, int]:
bytes_start = byterange.offset
if bytes_start is None:
raise StreamError("Missing BYTERANGE offset")
return bytes_start, self._calc_end(bytes_start, byterange.range)
class HLSStreamWriter(SegmentedStreamWriter):
WRITE_CHUNK_SIZE = 8192
reader: "HLSStreamReader"
stream: "HLSStream"
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
options = self.session.options
self.byterange: ByteRangeOffset = ByteRangeOffset()
self.map_cache: LRUCache[str, Future] = LRUCache(self.threads)
self.key_data: Union[bytes, bytearray, memoryview] = b""
self.key_uri: Optional[str] = None
self.key_uri_override = options.get("hls-segment-key-uri")
self.stream_data = options.get("hls-segment-stream-data")
self.ignore_names: Optional[re.Pattern] = None
ignore_names = {*options.get("hls-segment-ignore-names")}
if ignore_names:
segments = "|".join(map(re.escape, ignore_names))
# noinspection RegExpUnnecessaryNonCapturingGroup
self.ignore_names = re.compile(rf"(?:{segments})\.ts", re.IGNORECASE)
@staticmethod
def num_to_iv(n: int) -> bytes:
return struct.pack(">8xq", n)
def create_decryptor(self, key: Key, num: int):
if key.method != "AES-128":
raise StreamError(f"Unable to decrypt cipher {key.method}")
if not self.key_uri_override and not key.uri:
raise StreamError("Missing URI for decryption key")
if not self.key_uri_override:
key_uri = key.uri
else:
p = urlparse(key.uri)
formatter = Formatter({
"url": lambda: key.uri,
"scheme": lambda: p.scheme,
"netloc": lambda: p.netloc,
"path": lambda: p.path,
"query": lambda: p.query,
})
key_uri = formatter.format(self.key_uri_override)
if key_uri and self.key_uri != key_uri:
res = self.session.http.get(
key_uri,
exception=StreamError,
retries=self.retries,
**self.reader.request_params,
)
res.encoding = "binary/octet-stream"
self.key_data = res.content
self.key_uri = key_uri
iv = key.iv or self.num_to_iv(num)
# Pad IV if needed
iv = b"\x00" * (16 - len(iv)) + iv
return AES.new(self.key_data, AES.MODE_CBC, iv)
def create_request_params(self, num: int, segment: Union[Segment, Map], is_map: bool):
request_params = dict(self.reader.request_params)
headers = request_params.pop("headers", {})
if segment.byterange:
if is_map:
bytes_start, bytes_end = self.byterange.uncached(segment.byterange)
else:
bytes_start, bytes_end = self.byterange.cached(num, segment.byterange)
headers["Range"] = f"bytes={bytes_start}-{bytes_end}"
request_params["headers"] = headers
return request_params
def put(self, sequence: Sequence):
if self.closed:
return
if sequence is None:
self.queue(None, None)
return
# always queue the segment's map first if it exists
if sequence.segment.map is not None:
cached_map_future = self.map_cache.get(sequence.segment.map.uri)
# use cached map request if not a stream discontinuity
# don't fetch multiple times when map request of previous segment is still pending
if cached_map_future is not None and not sequence.segment.discontinuity:
future = cached_map_future
else:
future = self.executor.submit(self.fetch_map, sequence)
self.map_cache.set(sequence.segment.map.uri, future)
self.queue(sequence, future, True)
# regular segment request
future = self.executor.submit(self.fetch, sequence)
self.queue(sequence, future, False)
def fetch(self, sequence: Sequence) -> Optional[Response]:
try:
return self._fetch(
sequence.segment.uri,
stream=self.stream_data,
**self.create_request_params(sequence.num, sequence.segment, False),
)
except StreamError as err:
log.error(f"Failed to fetch segment {sequence.num}: {err}")
def fetch_map(self, sequence: Sequence) -> Optional[Response]:
_map: Map = sequence.segment.map # type: ignore[assignment] # map is not None
try:
return self._fetch(
_map.uri,
stream=False,
**self.create_request_params(sequence.num, _map, True),
)
except StreamError as err:
log.error(f"Failed to fetch map for segment {sequence.num}: {err}")
def _fetch(self, url: str, **request_params) -> Optional[Response]:
if self.closed or not self.retries: # pragma: no cover
return None
return self.session.http.get(
url,
timeout=self.timeout,
retries=self.retries,
exception=StreamError,
**request_params,
)
def should_filter_sequence(self, sequence: Sequence) -> bool:
return self.ignore_names is not None and self.ignore_names.search(sequence.segment.uri) is not None
def write(self, sequence: Sequence, result: Response, *data):
if not self.should_filter_sequence(sequence):
log.debug(f"Writing segment {sequence.num} to output")
written_once = self.reader.buffer.written_once
try:
return self._write(sequence, result, *data)
finally:
is_paused = self.reader.is_paused()
# Depending on the filtering implementation, the segment's discontinuity attribute can be missing.
# Also check if the output will be resumed after data has already been written to the buffer before.
if sequence.segment.discontinuity or is_paused and written_once:
log.warning(
"Encountered a stream discontinuity. This is unsupported and will result in incoherent output data.",
)
# unblock reader thread after writing data to the buffer
if is_paused:
log.info("Resuming stream output")
self.reader.resume()
else:
log.debug(f"Discarding segment {sequence.num}")
# Read and discard any remaining HTTP response data in the response connection.
# Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
result.raw.drain_conn()
# block reader thread if filtering out segments
if not self.reader.is_paused():
log.info("Filtering out segments and pausing stream output")
self.reader.pause()
def _write(self, sequence: Sequence, result: Response, is_map: bool):
if sequence.segment.key and sequence.segment.key.method != "NONE":
try:
decryptor = self.create_decryptor(sequence.segment.key, sequence.num)
except (StreamError, ValueError) as err:
log.error(f"Failed to create decryptor: {err}")
self.close()
return
try:
# Unlike plaintext segments, encrypted segments can't be written to the buffer in small chunks
# because of the byte padding at the end of the decrypted data, which means that decrypting in
# smaller chunks is unnecessary if the entire segment needs to be kept in memory anyway, unless
# we defer the buffer writes by one read call and apply the unpad call only to the last read call.
encrypted_chunk = result.content
decrypted_chunk = decryptor.decrypt(encrypted_chunk)
chunk = unpad(decrypted_chunk, AES.block_size, style="pkcs7")
self.reader.buffer.write(chunk)
except (ChunkedEncodingError, ContentDecodingError, ConnectionError) as err:
log.error(f"Download of segment {sequence.num} failed: {err}")
return
except ValueError as err:
log.error(f"Error while decrypting segment {sequence.num}: {err}")
return
else:
try:
for chunk in result.iter_content(self.WRITE_CHUNK_SIZE):
self.reader.buffer.write(chunk)
except (ChunkedEncodingError, ContentDecodingError, ConnectionError) as err:
log.error(f"Download of segment {sequence.num} failed: {err}")
return
if is_map:
log.debug(f"Segment initialization {sequence.num} complete")
else:
log.debug(f"Segment {sequence.num} complete")
class HLSStreamWorker(SegmentedStreamWorker):
reader: "HLSStreamReader"
writer: "HLSStreamWriter"
stream: "HLSStream"
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.playlist_changed = False
self.playlist_end: Optional[int] = None
self.playlist_sequence: int = -1
self.playlist_sequences: List[Sequence] = []
self.playlist_reload_time: float = 6
self.playlist_reload_time_override = self.session.options.get("hls-playlist-reload-time")
self.playlist_reload_retries = self.session.options.get("hls-playlist-reload-attempts")
self.live_edge = self.session.options.get("hls-live-edge")
self.duration_offset_start = int(self.stream.start_offset + (self.session.options.get("hls-start-offset") or 0))
self.duration_limit = self.stream.duration or (
int(self.session.options.get("hls-duration")) if self.session.options.get("hls-duration") else None)
self.hls_live_restart = self.stream.force_restart or self.session.options.get("hls-live-restart")
if str(self.playlist_reload_time_override).isnumeric() and float(self.playlist_reload_time_override) >= 2:
self.playlist_reload_time_override = float(self.playlist_reload_time_override)
elif self.playlist_reload_time_override not in ["segment", "live-edge"]:
self.playlist_reload_time_override = 0
def _fetch_playlist(self) -> Response:
res = self.session.http.get(
self.stream.url,
exception=StreamError,
retries=self.playlist_reload_retries,
**self.reader.request_params,
)
res.encoding = "utf-8"
return res
# TODO: rename to _parse_playlist
def _reload_playlist(self, *args, **kwargs):
return load_hls_playlist(*args, **kwargs)
def reload_playlist(self):
if self.closed: # pragma: no cover
return
self.reader.buffer.wait_free()
log.debug("Reloading playlist")
res = self._fetch_playlist()
try:
playlist = self._reload_playlist(res)
except ValueError as err:
raise StreamError(err) from err
if playlist.is_master:
raise StreamError(f"Attempted to play a variant playlist, use 'hls://{self.stream.url}' instead")
if playlist.iframes_only:
raise StreamError("Streams containing I-frames only are not playable")
media_sequence = playlist.media_sequence or 0
sequences = [Sequence(media_sequence + i, s)
for i, s in enumerate(playlist.segments)]
self.playlist_reload_time = self._playlist_reload_time(playlist, sequences)
if sequences:
self.process_sequences(playlist, sequences)
def _playlist_reload_time(self, playlist: M3U8, sequences: List[Sequence]) -> float:
if self.playlist_reload_time_override == "segment" and sequences:
return sequences[-1].segment.duration
if self.playlist_reload_time_override == "live-edge" and sequences:
return sum(s.segment.duration for s in sequences[-max(1, self.live_edge - 1):])
if type(self.playlist_reload_time_override) is float and self.playlist_reload_time_override > 0:
return self.playlist_reload_time_override
if playlist.target_duration:
return playlist.target_duration
if sequences:
return sum(s.segment.duration for s in sequences[-max(1, self.live_edge - 1):])
return self.playlist_reload_time
def process_sequences(self, playlist: M3U8, sequences: List[Sequence]) -> None:
first_sequence, last_sequence = sequences[0], sequences[-1]
if first_sequence.segment.key and first_sequence.segment.key.method != "NONE":
log.debug("Segments in this playlist are encrypted")
self.playlist_changed = ([s.num for s in self.playlist_sequences] != [s.num for s in sequences])
self.playlist_sequences = sequences
if not self.playlist_changed:
self.playlist_reload_time = max(self.playlist_reload_time / 2, 1)
if playlist.is_endlist:
self.playlist_end = last_sequence.num
if self.playlist_sequence < 0:
if self.playlist_end is None and not self.hls_live_restart:
edge_index = -(min(len(sequences), max(int(self.live_edge), 1)))
edge_sequence = sequences[edge_index]
self.playlist_sequence = edge_sequence.num
else:
self.playlist_sequence = first_sequence.num
def valid_sequence(self, sequence: Sequence) -> bool:
return sequence.num >= self.playlist_sequence
@staticmethod
def duration_to_sequence(duration: float, sequences: List[Sequence]) -> int:
d = 0.0
default = -1
sequences_order = sequences if duration >= 0 else reversed(sequences)
for sequence in sequences_order:
if d >= abs(duration):
return sequence.num
d += sequence.segment.duration
default = sequence.num
# could not skip far enough, so return the default
return default
def iter_segments(self):
try:
self.reload_playlist()
except StreamError as err:
log.error(f"{err}")
self.reader.close()
return
if self.playlist_end is None:
if self.duration_offset_start > 0:
log.debug(f"Time offsets negative for live streams, skipping back {self.duration_offset_start} seconds")
# live playlist, force offset durations back to None
self.duration_offset_start = -self.duration_offset_start
if self.duration_offset_start != 0:
self.playlist_sequence = self.duration_to_sequence(self.duration_offset_start, self.playlist_sequences)
if self.playlist_sequences:
log.debug("; ".join([
f"First Sequence: {self.playlist_sequences[0].num}",
f"Last Sequence: {self.playlist_sequences[-1].num}",
]))
log.debug("; ".join([
f"Start offset: {self.duration_offset_start}",
f"Duration: {self.duration_limit}",
f"Start Sequence: {self.playlist_sequence}",
f"End Sequence: {self.playlist_end}",
]))
total_duration = 0
while not self.closed:
for sequence in filter(self.valid_sequence, self.playlist_sequences):
log.debug(f"Adding segment {sequence.num} to queue")
yield sequence
total_duration += sequence.segment.duration
if self.duration_limit and total_duration >= self.duration_limit:
log.info(f"Stopping stream early after {self.duration_limit}")
return
# End of stream
stream_end = self.playlist_end and sequence.num >= self.playlist_end
if self.closed or stream_end:
return
self.playlist_sequence = sequence.num + 1
if self.wait(self.playlist_reload_time):
try:
self.reload_playlist()
except StreamError as err:
log.warning(f"Failed to reload playlist: {err}")
class HLSStreamReader(FilteredStream, SegmentedStreamReader):
__worker__ = HLSStreamWorker
__writer__ = HLSStreamWriter
worker: "HLSStreamWorker"
writer: "HLSStreamWriter"
stream: "HLSStream"
buffer: RingBuffer
def __init__(self, stream: "HLSStream"):
self.request_params = dict(stream.args)
# These params are reserved for internal use
self.request_params.pop("exception", None)
self.request_params.pop("stream", None)
self.request_params.pop("timeout", None)
self.request_params.pop("url", None)
super().__init__(stream)
class MuxedHLSStream(MuxedStream):
"""
Muxes multiple HLS video and audio streams into one output stream.
"""
__shortname__ = "hls-multi"
def __init__(
self,
session,
video: str,
audio: Union[str, List[str]],
url_master: Optional[str] = None,
multivariant: Optional[M3U8] = None,
force_restart: bool = False,
ffmpeg_options: Optional[Dict[str, Any]] = None,
**args,
):
"""
:param streamlink.Streamlink session: Streamlink session instance
:param video: Video stream URL
:param audio: Audio stream URL or list of URLs
:param url_master: The URL of the HLS playlist's multivariant playlist (deprecated)
:param multivariant: The parsed multivariant playlist
:param force_restart: Start from the beginning after reaching the playlist's end
:param ffmpeg_options: Additional keyword arguments passed to :class:`ffmpegmux.FFMPEGMuxer`
:param args: Additional keyword arguments passed to :class:`HLSStream`
"""
tracks = [video]
maps = ["0:v?", "0:a?"]
if audio:
if isinstance(audio, list):
tracks.extend(audio)
else:
tracks.append(audio)
maps.extend(f"{i}:a" for i in range(1, len(tracks)))
substreams = [HLSStream(session, url, force_restart=force_restart, **args) for url in tracks]
ffmpeg_options = ffmpeg_options or {}
super().__init__(session, *substreams, format="mpegts", maps=maps, **ffmpeg_options)
self._url_master = url_master
self.multivariant = multivariant if multivariant and multivariant.is_master else None
@property
def url_master(self):
"""Deprecated"""
return self.multivariant.uri if self.multivariant and self.multivariant.uri else self._url_master
def to_manifest_url(self):
url = self.multivariant.uri if self.multivariant and self.multivariant.uri else self.url_master
if url is None:
return super().to_manifest_url()
return url
class HLSStream(HTTPStream):
"""
Implementation of the Apple HTTP Live Streaming protocol.
"""
__shortname__ = "hls"
__reader__ = HLSStreamReader
def __init__(
self,
session_,
url: str,
url_master: Optional[str] = None,
multivariant: Optional[M3U8] = None,
force_restart: bool = False,
start_offset: float = 0,
duration: Optional[float] = None,
**args,
):
"""
:param streamlink.Streamlink session_: Streamlink session instance
:param url: The URL of the HLS playlist
:param url_master: The URL of the HLS playlist's multivariant playlist (deprecated)
:param multivariant: The parsed multivariant playlist
:param force_restart: Start from the beginning after reaching the playlist's end
:param start_offset: Number of seconds to be skipped from the beginning
:param duration: Number of seconds until ending the stream
:param args: Additional keyword arguments passed to :meth:`requests.Session.request`
"""
super().__init__(session_, url, **args)
self._url_master = url_master
self.multivariant = multivariant if multivariant and multivariant.is_master else None
self.force_restart = force_restart
self.start_offset = start_offset
self.duration = duration
def __json__(self):
json = super().__json__()
try:
json["master"] = self.to_manifest_url()
except TypeError:
pass
del json["method"]
del json["body"]
return json
@property
def url_master(self):
"""Deprecated"""
return self.multivariant.uri if self.multivariant and self.multivariant.uri else self._url_master
def to_manifest_url(self):
url = self.multivariant.uri if self.multivariant and self.multivariant.uri else self.url_master
if url is None:
return super().to_manifest_url()
args = self.args.copy()
args.update(url=url)
return self.session.http.prepare_new_request(**args).url
def open(self):
reader = self.__reader__(self)
reader.open()
return reader
@classmethod
def _fetch_variant_playlist(cls, session, url: str, **request_params) -> Response:
res = session.http.get(url, exception=OSError, **request_params)
res.encoding = "utf-8"
return res
# TODO: rename to _parse_variant_playlist
@classmethod
def _get_variant_playlist(cls, *args, **kwargs):
return load_hls_playlist(*args, **kwargs)
@classmethod
def parse_variant_playlist(
cls,
session_,
url: str,
name_key: str = "name",
name_prefix: str = "",
check_streams: bool = False,
force_restart: bool = False,
name_fmt: Optional[str] = None,
start_offset: float = 0,
duration: Optional[float] = None,
**request_params,
) -> Dict[str, Union["HLSStream", "MuxedHLSStream"]]:
"""
Parse a variant playlist and return its streams.
:param streamlink.Streamlink session_: Streamlink session instance
:param url: The URL of the variant playlist
:param name_key: Prefer to use this key as stream name, valid keys are: name, pixels, bitrate
:param name_prefix: Add this prefix to the stream names
:param check_streams: Only allow streams that are accessible
:param force_restart: Start at the first segment even for a live stream
:param name_fmt: A format string for the name, allowed format keys are: name, pixels, bitrate
:param start_offset: Number of seconds to be skipped from the beginning
:param duration: Number of second until ending the stream
:param request_params: Additional keyword arguments passed to :class:`HLSStream`, :class:`MuxedHLSStream`,
or :py:meth:`requests.Session.request`
"""
locale = session_.localization
audio_select = session_.options.get("hls-audio-select")
res = cls._fetch_variant_playlist(session_, url, **request_params)
try:
multivariant = cls._get_variant_playlist(res)
except ValueError as err:
raise OSError(f"Failed to parse playlist: {err}") from err
stream_name: Optional[str]
stream: Union["HLSStream", "MuxedHLSStream"]
streams: Dict[str, Union["HLSStream", "MuxedHLSStream"]] = {}
for playlist in filter(lambda p: not p.is_iframe, multivariant.playlists):
names: Dict[str, Optional[str]] = dict(name=None, pixels=None, bitrate=None)
audio_streams = []
fallback_audio: List[Media] = []
default_audio: List[Media] = []
preferred_audio: List[Media] = []
for media in playlist.media:
if media.type == "VIDEO" and media.name:
names["name"] = media.name
elif media.type == "AUDIO":
audio_streams.append(media)
for media in audio_streams:
# Media without a URI is not relevant as external audio
if not media.uri:
continue
if not fallback_audio and media.default:
fallback_audio = [media]
# if the media is "autoselect" and it better matches the users preferences, use that
# instead of default
if not default_audio and (media.autoselect and locale.equivalent(language=media.language)):
default_audio = [media]
# select the first audio stream that matches the user's explict language selection
if (
(
"*" in audio_select
or media.language in audio_select
or media.name in audio_select
)
or (
(not preferred_audio or media.default)
and locale.explicit
and locale.equivalent(language=media.language)
)
):
preferred_audio.append(media)
# final fallback on the first audio stream listed
if not fallback_audio and len(audio_streams) and audio_streams[0].uri:
fallback_audio = [audio_streams[0]]
if playlist.stream_info.resolution and playlist.stream_info.resolution.height:
names["pixels"] = f"{playlist.stream_info.resolution.height}p"
if playlist.stream_info.bandwidth:
bw = playlist.stream_info.bandwidth
if bw >= 1000:
names["bitrate"] = f"{int(bw / 1000.0)}k"
else:
names["bitrate"] = f"{bw / 1000.0}k"
if name_fmt:
stream_name = name_fmt.format(**names)
else:
stream_name = (
names.get(name_key)
or names.get("name")
or names.get("pixels")
or names.get("bitrate")
)
if not stream_name:
continue
if name_prefix:
stream_name = f"{name_prefix}{stream_name}"
if stream_name in streams: # rename duplicate streams
stream_name = f"{stream_name}_alt"
num_alts = len([k for k in streams.keys() if k.startswith(stream_name)])
# We shouldn't need more than 2 alt streams
if num_alts >= 2:
continue
elif num_alts > 0:
stream_name = f"{stream_name}{num_alts + 1}"
if check_streams:
# noinspection PyBroadException
try:
session_.http.get(playlist.uri, **request_params)
except KeyboardInterrupt:
raise
except Exception:
continue
external_audio = preferred_audio or default_audio or fallback_audio
if external_audio and FFMPEGMuxer.is_usable(session_):
external_audio_msg = ", ".join([
f"(language={x.language}, name={x.name or 'N/A'})"
for x in external_audio
])
log.debug(f"Using external audio tracks for stream {stream_name} {external_audio_msg}")
stream = MuxedHLSStream(
session_,
video=playlist.uri,
audio=[x.uri for x in external_audio if x.uri],
multivariant=multivariant,
force_restart=force_restart,
start_offset=start_offset,
duration=duration,
**request_params,
)
else:
stream = cls(
session_,
playlist.uri,
multivariant=multivariant,
force_restart=force_restart,
start_offset=start_offset,
duration=duration,
**request_params,
)
streams[stream_name] = stream
return streams