mirror of https://github.com/streamlink/streamlink
193 lines
7.8 KiB
Python
193 lines
7.8 KiB
Python
import ssl
|
|
import time
|
|
from typing import Any, Dict, Pattern, Tuple
|
|
|
|
import requests.adapters
|
|
import urllib3
|
|
from requests import PreparedRequest, Request, Session
|
|
|
|
from streamlink.exceptions import PluginError
|
|
from streamlink.packages.requests_file import FileAdapter
|
|
from streamlink.plugin.api import useragents
|
|
from streamlink.utils.parse import parse_json, parse_xml
|
|
|
|
|
|
class _HTTPResponse(urllib3.response.HTTPResponse):
|
|
def __init__(self, *args, **kwargs):
|
|
# Always enforce content length validation!
|
|
# This fixes a bug in requests which doesn't raise errors on HTTP responses where
|
|
# the "Content-Length" header doesn't match the response's body length.
|
|
# https://github.com/psf/requests/issues/4956#issuecomment-573325001
|
|
#
|
|
# Summary:
|
|
# This bug is related to urllib3.response.HTTPResponse.stream() which calls urllib3.response.HTTPResponse.read() as
|
|
# a wrapper for http.client.HTTPResponse.read(amt=...), where no http.client.IncompleteRead exception gets raised
|
|
# due to "backwards compatiblity" of an old bug if a specific amount is attempted to be read on an incomplete response.
|
|
#
|
|
# urllib3.response.HTTPResponse.read() however has an additional check implemented via the enforce_content_length
|
|
# parameter, but it doesn't check by default and requests doesn't set the parameter for enabling it either.
|
|
#
|
|
# Fix this by overriding urllib3.response.HTTPResponse's constructor and always setting enforce_content_length to True,
|
|
# as there is no way to make requests set this parameter on its own.
|
|
kwargs["enforce_content_length"] = True
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
# override all urllib3.response.HTTPResponse references in requests.adapters.HTTPAdapter.send
|
|
urllib3.connectionpool.HTTPConnectionPool.ResponseCls = _HTTPResponse # type: ignore[attr-defined]
|
|
requests.adapters.HTTPResponse = _HTTPResponse # type: ignore[misc]
|
|
|
|
|
|
# Never convert percent-encoded characters to uppercase in urllib3>=1.25.8.
|
|
# This is required for sites which compare request URLs byte by byte and return different responses depending on that.
|
|
# Older versions of urllib3 are not compatible with this override and will always convert to uppercase characters.
|
|
#
|
|
# https://datatracker.ietf.org/doc/html/rfc3986#section-2.1
|
|
# > The uppercase hexadecimal digits 'A' through 'F' are equivalent to
|
|
# > the lowercase digits 'a' through 'f', respectively. If two URIs
|
|
# > differ only in the case of hexadecimal digits used in percent-encoded
|
|
# > octets, they are equivalent. For consistency, URI producers and
|
|
# > normalizers should use uppercase hexadecimal digits for all percent-
|
|
# > encodings.
|
|
class Urllib3UtilUrlPercentReOverride:
|
|
_re_percent_encoding: Pattern = urllib3.util.url.PERCENT_RE # type: ignore[attr-defined]
|
|
|
|
# urllib3>=1.25.8
|
|
# https://github.com/urllib3/urllib3/blame/1.25.8/src/urllib3/util/url.py#L219-L227
|
|
@classmethod
|
|
def subn(cls, repl: Any, string: str, count: Any = None) -> Tuple[str, int]:
|
|
return string, len(cls._re_percent_encoding.findall(string))
|
|
|
|
|
|
urllib3.util.url.PERCENT_RE = Urllib3UtilUrlPercentReOverride # type: ignore[attr-defined]
|
|
|
|
|
|
# requests.Request.__init__ keywords, except for "hooks"
|
|
_VALID_REQUEST_ARGS = "method", "url", "headers", "files", "data", "params", "auth", "cookies", "json"
|
|
|
|
|
|
class HTTPSession(Session):
|
|
params: Dict
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
self.headers["User-Agent"] = useragents.FIREFOX
|
|
self.timeout = 20.0
|
|
|
|
self.mount("file://", FileAdapter())
|
|
|
|
@classmethod
|
|
def determine_json_encoding(cls, sample):
|
|
"""
|
|
Determine which Unicode encoding the JSON text sample is encoded with
|
|
|
|
RFC4627 (http://www.ietf.org/rfc/rfc4627.txt) suggests that the encoding of JSON text can be determined
|
|
by checking the pattern of NULL bytes in first 4 octets of the text.
|
|
:param sample: a sample of at least 4 bytes of the JSON text
|
|
:return: the most likely encoding of the JSON text
|
|
"""
|
|
nulls_at = [i for i, j in enumerate(bytearray(sample[:4])) if j == 0]
|
|
if nulls_at == [0, 1, 2]:
|
|
return "UTF-32BE"
|
|
elif nulls_at == [0, 2]:
|
|
return "UTF-16BE"
|
|
elif nulls_at == [1, 2, 3]:
|
|
return "UTF-32LE"
|
|
elif nulls_at == [1, 3]:
|
|
return "UTF-16LE"
|
|
else:
|
|
return "UTF-8"
|
|
|
|
@classmethod
|
|
def json(cls, res, *args, **kwargs):
|
|
"""Parses JSON from a response."""
|
|
# if an encoding is already set then use the provided encoding
|
|
if res.encoding is None:
|
|
res.encoding = cls.determine_json_encoding(res.content[:4])
|
|
return parse_json(res.text, *args, **kwargs)
|
|
|
|
@classmethod
|
|
def xml(cls, res, *args, **kwargs):
|
|
"""Parses XML from a response."""
|
|
return parse_xml(res.text, *args, **kwargs)
|
|
|
|
def resolve_url(self, url):
|
|
"""Resolves any redirects and returns the final URL."""
|
|
return self.get(url, stream=True).url
|
|
|
|
@staticmethod
|
|
def valid_request_args(**req_keywords) -> Dict:
|
|
return {k: v for k, v in req_keywords.items() if k in _VALID_REQUEST_ARGS}
|
|
|
|
def prepare_new_request(self, **req_keywords) -> PreparedRequest:
|
|
valid_args = self.valid_request_args(**req_keywords)
|
|
valid_args.setdefault("method", "GET")
|
|
request = Request(**valid_args)
|
|
|
|
# prepare request with the session context, which might add params, headers, cookies, etc.
|
|
return self.prepare_request(request)
|
|
|
|
def request(self, method, url, *args, **kwargs):
|
|
acceptable_status = kwargs.pop("acceptable_status", [])
|
|
exception = kwargs.pop("exception", PluginError)
|
|
headers = kwargs.pop("headers", {})
|
|
params = kwargs.pop("params", {})
|
|
proxies = kwargs.pop("proxies", self.proxies)
|
|
raise_for_status = kwargs.pop("raise_for_status", True)
|
|
schema = kwargs.pop("schema", None)
|
|
session = kwargs.pop("session", None)
|
|
timeout = kwargs.pop("timeout", self.timeout)
|
|
total_retries = kwargs.pop("retries", 0)
|
|
retry_backoff = kwargs.pop("retry_backoff", 0.3)
|
|
retry_max_backoff = kwargs.pop("retry_max_backoff", 10.0)
|
|
retries = 0
|
|
|
|
if session:
|
|
headers.update(session.headers)
|
|
params.update(session.params)
|
|
|
|
while True:
|
|
try:
|
|
res = super().request(
|
|
method,
|
|
url,
|
|
*args,
|
|
headers=headers,
|
|
params=params,
|
|
timeout=timeout,
|
|
proxies=proxies,
|
|
**kwargs,
|
|
)
|
|
if raise_for_status and res.status_code not in acceptable_status:
|
|
res.raise_for_status()
|
|
break
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except Exception as rerr:
|
|
if retries >= total_retries:
|
|
err = exception(f"Unable to open URL: {url} ({rerr})")
|
|
err.err = rerr
|
|
raise err
|
|
retries += 1
|
|
# back off retrying, but only to a maximum sleep time
|
|
delay = min(retry_max_backoff,
|
|
retry_backoff * (2 ** (retries - 1)))
|
|
time.sleep(delay)
|
|
|
|
if schema:
|
|
res = schema.validate(res.text, name="response text", exception=PluginError)
|
|
|
|
return res
|
|
|
|
|
|
class TLSSecLevel1Adapter(requests.adapters.HTTPAdapter):
|
|
def init_poolmanager(self, *args, **kwargs):
|
|
ctx = ssl.create_default_context()
|
|
ctx.set_ciphers("DEFAULT:@SECLEVEL=1")
|
|
kwargs["ssl_context"] = ctx
|
|
return super().init_poolmanager(*args, **kwargs)
|
|
|
|
|
|
__all__ = ["HTTPSession", "TLSSecLevel1Adapter"]
|