streamlink/src/streamlink/plugin/api/http_session.py

import time
from typing import Any, Callable, List, Pattern, Tuple

import requests.adapters
import urllib3
from requests import Session

from streamlink.exceptions import PluginError
from streamlink.packages.requests_file import FileAdapter
from streamlink.plugin.api import useragents
from streamlink.utils import parse_json, parse_xml


urllib3_version = tuple(map(int, urllib3.__version__.split(".")[:3]))


try:
    # We tell urllib3 to disable warnings about unverified HTTPS requests,
    # because in some plugins we have to do unverified requests intentionally.
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
except AttributeError:
    pass


class _HTTPResponse(urllib3.response.HTTPResponse):
    def __init__(self, *args, **kwargs):
        # Always enforce content length validation!
        # This fixes a bug in requests which doesn't raise errors on HTTP responses where
        # the "Content-Length" header doesn't match the response's body length.
        # https://github.com/psf/requests/issues/4956#issuecomment-573325001
        #
        # Summary:
        # This bug is related to urllib3.response.HTTPResponse.stream() which calls urllib3.response.HTTPResponse.read() as
        # a wrapper for http.client.HTTPResponse.read(amt=...), where no http.client.IncompleteRead exception gets raised
        # due to "backwards compatiblity" of an old bug if a specific amount is attempted to be read on an incomplete response.
        #
        # urllib3.response.HTTPResponse.read() however has an additional check implemented via the enforce_content_length
        # parameter, but it doesn't check by default and requests doesn't set the parameter for enabling it either.
        #
        # Fix this by overriding urllib3.response.HTTPResponse's constructor and always setting enforce_content_length to True,
        # as there is no way to make requests set this parameter on its own.
        kwargs.update({"enforce_content_length": True})
        super().__init__(*args, **kwargs)


# override all urllib3.response.HTTPResponse references in requests.adapters.HTTPAdapter.send
urllib3.connectionpool.HTTPConnectionPool.ResponseCls = _HTTPResponse
requests.adapters.HTTPResponse = _HTTPResponse


# Never convert percent-encoded characters to uppercase in urllib3>=1.25.4.
# This is required for sites which compare request URLs byte for byte and return different responses depending on that.
# Older versions of urllib3 are not compatible with this override and will always convert to uppercase characters.
#
# https://datatracker.ietf.org/doc/html/rfc3986#section-2.1
# > The uppercase hexadecimal digits 'A' through 'F' are equivalent to
# > the lowercase digits 'a' through 'f', respectively.  If two URIs
# > differ only in the case of hexadecimal digits used in percent-encoded
# > octets, they are equivalent.  For consistency, URI producers and
# > normalizers should use uppercase hexadecimal digits for all percent-
# > encodings.
if urllib3_version >= (1, 25, 4):
    class Urllib3UtilUrlPercentReOverride:
        _re_percent_encoding: Pattern = urllib3.util.url.PERCENT_RE

        @classmethod
        def _num_percent_encodings(cls, string) -> int:
            return len(cls._re_percent_encoding.findall(string))

        # urllib3>=1.25.8
        # https://github.com/urllib3/urllib3/blame/1.25.8/src/urllib3/util/url.py#L219-L227
        @classmethod
        def subn(cls, repl: Callable, string: str) -> Tuple[str, int]:
            return string, cls._num_percent_encodings(string)

        # urllib3>=1.25.4,<1.25.8
        # https://github.com/urllib3/urllib3/blame/1.25.4/src/urllib3/util/url.py#L218-L228
        @classmethod
        def findall(cls, string: str) -> List[Any]:
            class _List(list):
                def __len__(self) -> int:
                    return cls._num_percent_encodings(string)

            return _List()

    urllib3.util.url.PERCENT_RE = Urllib3UtilUrlPercentReOverride


def _parse_keyvalue_list(val):
    for keyvalue in val.split(";"):
        try:
            key, value = keyvalue.split("=", 1)
            yield key.strip(), value.strip()
        except ValueError:
            continue


class HTTPSession(Session):
    def __init__(self):
        super().__init__()

        self.headers['User-Agent'] = useragents.FIREFOX
        self.timeout = 20.0

        self.mount('file://', FileAdapter())

    @classmethod
    def determine_json_encoding(cls, sample):
        """
        Determine which Unicode encoding the JSON text sample is encoded with

        RFC4627 (http://www.ietf.org/rfc/rfc4627.txt) suggests that the encoding of JSON text can be determined
        by checking the pattern of NULL bytes in first 4 octets of the text.
        :param sample: a sample of at least 4 bytes of the JSON text
        :return: the most likely encoding of the JSON text
        """
        nulls_at = [i for i, j in enumerate(bytearray(sample[:4])) if j == 0]
        if nulls_at == [0, 1, 2]:
            return "UTF-32BE"
        elif nulls_at == [0, 2]:
            return "UTF-16BE"
        elif nulls_at == [1, 2, 3]:
            return "UTF-32LE"
        elif nulls_at == [1, 3]:
            return "UTF-16LE"
        else:
            return "UTF-8"

    @classmethod
    def json(cls, res, *args, **kwargs):
        """Parses JSON from a response."""
        # if an encoding is already set then use the provided encoding
        if res.encoding is None:
            res.encoding = cls.determine_json_encoding(res.content[:4])
        return parse_json(res.text, *args, **kwargs)

    @classmethod
    def xml(cls, res, *args, **kwargs):
        """Parses XML from a response."""
        return parse_xml(res.text, *args, **kwargs)

    def parse_cookies(self, cookies, **kwargs):
        """Parses a semi-colon delimited list of cookies.

        Example: foo=bar;baz=qux
        """
        for name, value in _parse_keyvalue_list(cookies):
            self.cookies.set(name, value, **kwargs)

    def parse_headers(self, headers):
        """Parses a semi-colon delimited list of headers.

        Example: foo=bar;baz=qux
        """
        for name, value in _parse_keyvalue_list(headers):
            self.headers[name] = value

    def parse_query_params(self, cookies, **kwargs):
        """Parses a semi-colon delimited list of query parameters.

        Example: foo=bar;baz=qux
        """
        for name, value in _parse_keyvalue_list(cookies):
            self.params[name] = value

    def resolve_url(self, url):
        """Resolves any redirects and returns the final URL."""
        return self.get(url, stream=True).url

    def request(self, method, url, *args, **kwargs):
        acceptable_status = kwargs.pop("acceptable_status", [])
        exception = kwargs.pop("exception", PluginError)
        headers = kwargs.pop("headers", {})
        params = kwargs.pop("params", {})
        proxies = kwargs.pop("proxies", self.proxies)
        raise_for_status = kwargs.pop("raise_for_status", True)
        schema = kwargs.pop("schema", None)
        session = kwargs.pop("session", None)
        timeout = kwargs.pop("timeout", self.timeout)
        total_retries = kwargs.pop("retries", 0)
        retry_backoff = kwargs.pop("retry_backoff", 0.3)
        retry_max_backoff = kwargs.pop("retry_max_backoff", 10.0)
        retries = 0

        if session:
            headers.update(session.headers)
            params.update(session.params)

        while True:
            try:
                res = super().request(
                    method,
                    url,
                    headers=headers,
                    params=params,
                    timeout=timeout,
                    proxies=proxies,
                    *args,
                    **kwargs
                )
                if raise_for_status and res.status_code not in acceptable_status:
                    res.raise_for_status()
                break
            except KeyboardInterrupt:
                raise
            except Exception as rerr:
                if retries >= total_retries:
                    err = exception(f"Unable to open URL: {url} ({rerr})")
                    err.err = rerr
                    raise err
                retries += 1
                # back off retrying, but only to a maximum sleep time
                delay = min(retry_max_backoff,
                            retry_backoff * (2 ** (retries - 1)))
                time.sleep(delay)

        if schema:
            res = schema.validate(res.text, name="response text", exception=PluginError)

        return res


__all__ = ["HTTPSession"]
Ensure retries with HLS Streams (#522) * stream.hls: ensure that other http requests in the hls stream class make retries. eg. playlist_reload, aes key requests as well as segments * stream.hls: fix unaccounted for errors when streaming HLS data 2017-02-03 18:26:56 +01:00			`import time`
http_session: override urllib3 percent-encoding 2021-09-10 06:35:09 +02:00			`from typing import Any, Callable, List, Pattern, Tuple`
chore: sort imports, fix a dependency cycle and use absolute imports Co-authored-by: bastimeyer <mail@bastimeyer.de> 2020-10-26 14:46:17 +01:00
plugin.api.http_session: enforce_content_length 2021-06-02 13:47:21 +02:00			`import requests.adapters`
			`import urllib3`
http_session: remove HTTPAdapterWithReadTimeout 2021-01-06 18:09:26 +01:00			`from requests import Session`
Ensure retries with HLS Streams (#522) * stream.hls: ensure that other http requests in the hls stream class make retries. eg. playlist_reload, aes key requests as well as segments * stream.hls: fix unaccounted for errors when streaming HLS data 2017-02-03 18:26:56 +01:00
chore: sort imports, fix a dependency cycle and use absolute imports Co-authored-by: bastimeyer <mail@bastimeyer.de> 2020-10-26 14:46:17 +01:00			`from streamlink.exceptions import PluginError`
Support for local files (#304) * Support for local files using an embedded copy of requests-file * add support for relative paths in the local file loading * add support for reading files from stdin * fix support for local files in python 3 * add Windows support for relative paths in local file reading * improve the URL format for Windows local file:// URLs * docs: updated to include local file info 2016-12-16 06:18:53 +01:00			`from streamlink.packages.requests_file import FileAdapter`
plugin.api: use Firefox as default User-Agent instead of python-requests - User-Agent update 2020-02-08 19:02:16 +01:00			`from streamlink.plugin.api import useragents`
chore: sort imports, fix a dependency cycle and use absolute imports Co-authored-by: bastimeyer <mail@bastimeyer.de> 2020-10-26 14:46:17 +01:00			`from streamlink.utils import parse_json, parse_xml`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00
plugin.api.http_session: Ignore some urllib3 warnings. 2014-08-30 02:17:00 +02:00
http_session: override urllib3 percent-encoding 2021-09-10 06:35:09 +02:00			`urllib3_version = tuple(map(int, urllib3.__version__.split(".")[:3]))`


plugin.api.http_session: enforce_content_length 2021-06-02 13:47:21 +02:00			`try:`
plugin.api.http_session: Ignore some urllib3 warnings. 2014-08-30 02:17:00 +02:00			`# We tell urllib3 to disable warnings about unverified HTTPS requests,`
			`# because in some plugins we have to do unverified requests intentionally.`
			`urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)`
plugin.api.http_session: enforce_content_length 2021-06-02 13:47:21 +02:00			`except AttributeError:`
plugin.api.http_session: Ignore some urllib3 warnings. 2014-08-30 02:17:00 +02:00			`pass`

plugin.api.http_session: enforce_content_length 2021-06-02 13:47:21 +02:00
			`class _HTTPResponse(urllib3.response.HTTPResponse):`
			`def __init__(self, args, *kwargs):`
			`# Always enforce content length validation!`
			`# This fixes a bug in requests which doesn't raise errors on HTTP responses where`
			`# the "Content-Length" header doesn't match the response's body length.`
			`# https://github.com/psf/requests/issues/4956#issuecomment-573325001`
			`#`
			`# Summary:`
			`# This bug is related to urllib3.response.HTTPResponse.stream() which calls urllib3.response.HTTPResponse.read() as`
			`# a wrapper for http.client.HTTPResponse.read(amt=...), where no http.client.IncompleteRead exception gets raised`
			`# due to "backwards compatiblity" of an old bug if a specific amount is attempted to be read on an incomplete response.`
			`#`
			`# urllib3.response.HTTPResponse.read() however has an additional check implemented via the enforce_content_length`
			`# parameter, but it doesn't check by default and requests doesn't set the parameter for enabling it either.`
			`#`
			`# Fix this by overriding urllib3.response.HTTPResponse's constructor and always setting enforce_content_length to True,`
			`# as there is no way to make requests set this parameter on its own.`
			`kwargs.update({"enforce_content_length": True})`
			`super().__init__(args, *kwargs)`


			`# override all urllib3.response.HTTPResponse references in requests.adapters.HTTPAdapter.send`
			`urllib3.connectionpool.HTTPConnectionPool.ResponseCls = _HTTPResponse`
			`requests.adapters.HTTPResponse = _HTTPResponse`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00

http_session: override urllib3 percent-encoding 2021-09-10 06:35:09 +02:00			`# Never convert percent-encoded characters to uppercase in urllib3>=1.25.4.`
			`# This is required for sites which compare request URLs byte for byte and return different responses depending on that.`
			`# Older versions of urllib3 are not compatible with this override and will always convert to uppercase characters.`
			`#`
			`# https://datatracker.ietf.org/doc/html/rfc3986#section-2.1`
			`# > The uppercase hexadecimal digits 'A' through 'F' are equivalent to`
			`# > the lowercase digits 'a' through 'f', respectively. If two URIs`
			`# > differ only in the case of hexadecimal digits used in percent-encoded`
			`# > octets, they are equivalent. For consistency, URI producers and`
			`# > normalizers should use uppercase hexadecimal digits for all percent-`
			`# > encodings.`
			`if urllib3_version >= (1, 25, 4):`
			`class Urllib3UtilUrlPercentReOverride:`
			`_re_percent_encoding: Pattern = urllib3.util.url.PERCENT_RE`

			`@classmethod`
			`def _num_percent_encodings(cls, string) -> int:`
			`return len(cls._re_percent_encoding.findall(string))`

			`# urllib3>=1.25.8`
			`# https://github.com/urllib3/urllib3/blame/1.25.8/src/urllib3/util/url.py#L219-L227`
			`@classmethod`
			`def subn(cls, repl: Callable, string: str) -> Tuple[str, int]:`
			`return string, cls._num_percent_encodings(string)`

			`# urllib3>=1.25.4,<1.25.8`
			`# https://github.com/urllib3/urllib3/blame/1.25.4/src/urllib3/util/url.py#L218-L228`
			`@classmethod`
			`def findall(cls, string: str) -> List[Any]:`
			`class _List(list):`
			`def __len__(self) -> int:`
			`return cls._num_percent_encodings(string)`

			`return _List()`

			`urllib3.util.url.PERCENT_RE = Urllib3UtilUrlPercentReOverride`


Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`def _parse_keyvalue_list(val):`
			`for keyvalue in val.split(";"):`
			`try:`
plugin.api.http_session: Fix bad key/value parsing. Handle extra ='s in the value. 2014-06-26 02:14:12 +02:00			`key, value = keyvalue.split("=", 1)`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`yield key.strip(), value.strip()`
			`except ValueError:`
			`continue`


			`class HTTPSession(Session):`
plugin.api.http_session: refactor HTTPSession 2021-06-02 12:30:12 +02:00			`def __init__(self):`
			`super().__init__()`
plugin.api: use Firefox as default User-Agent instead of python-requests - User-Agent update 2020-02-08 19:02:16 +01:00
plugin.api.http_session: refactor HTTPSession 2021-06-02 12:30:12 +02:00			`self.headers['User-Agent'] = useragents.FIREFOX`
plugin.api.http_session: Add timeout attribute. 2014-04-20 17:06:47 +02:00			`self.timeout = 20.0`

Support for local files (#304) * Support for local files using an embedded copy of requests-file * add support for relative paths in the local file loading * add support for reading files from stdin * fix support for local files in python 3 * add Windows support for relative paths in local file reading * improve the URL format for Windows local file:// URLs * docs: updated to include local file info 2016-12-16 06:18:53 +01:00			`self.mount('file://', FileAdapter())`

Added a method to automatically determine the encoding when parsing JSON, if no encoding is provided. (#122) 2016-11-03 17:35:57 +01:00			`@classmethod`
			`def determine_json_encoding(cls, sample):`
			`"""`
			`Determine which Unicode encoding the JSON text sample is encoded with`

			`RFC4627 (http://www.ietf.org/rfc/rfc4627.txt) suggests that the encoding of JSON text can be determined`
			`by checking the pattern of NULL bytes in first 4 octets of the text.`
			`:param sample: a sample of at least 4 bytes of the JSON text`
			`:return: the most likely encoding of the JSON text`
			`"""`
			`nulls_at = [i for i, j in enumerate(bytearray(sample[:4])) if j == 0]`
			`if nulls_at == [0, 1, 2]:`
			`return "UTF-32BE"`
			`elif nulls_at == [0, 2]:`
			`return "UTF-16BE"`
			`elif nulls_at == [1, 2, 3]:`
			`return "UTF-32LE"`
			`elif nulls_at == [1, 3]:`
			`return "UTF-16LE"`
			`else:`
			`return "UTF-8"`

Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`@classmethod`
			`def json(cls, res, args, *kwargs):`
			`"""Parses JSON from a response."""`
Added a method to automatically determine the encoding when parsing JSON, if no encoding is provided. (#122) 2016-11-03 17:35:57 +01:00			`# if an encoding is already set then use the provided encoding`
			`if res.encoding is None:`
			`res.encoding = cls.determine_json_encoding(res.content[:4])`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`return parse_json(res.text, args, *kwargs)`

			`@classmethod`
			`def xml(cls, res, args, *kwargs):`
			`"""Parses XML from a response."""`
			`return parse_xml(res.text, args, *kwargs)`

			`def parse_cookies(self, cookies, **kwargs):`
			`"""Parses a semi-colon delimited list of cookies.`

			`Example: foo=bar;baz=qux`
			`"""`
			`for name, value in _parse_keyvalue_list(cookies):`
			`self.cookies.set(name, value, **kwargs)`

			`def parse_headers(self, headers):`
			`"""Parses a semi-colon delimited list of headers.`

			`Example: foo=bar;baz=qux`
			`"""`
			`for name, value in _parse_keyvalue_list(headers):`
			`self.headers[name] = value`

			`def parse_query_params(self, cookies, **kwargs):`
			`"""Parses a semi-colon delimited list of query parameters.`

			`Example: foo=bar;baz=qux`
			`"""`
			`for name, value in _parse_keyvalue_list(cookies):`
			`self.params[name] = value`

plugin.api.http_session: Add a resolve_url method. 2014-04-20 22:00:52 +02:00			`def resolve_url(self, url):`
			`"""Resolves any redirects and returns the final URL."""`
			`return self.get(url, stream=True).url`

Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`def request(self, method, url, args, *kwargs):`
plugin.api.http_session: Add acceptable_status and raise_for_status request options. 2014-06-27 23:50:33 +02:00			`acceptable_status = kwargs.pop("acceptable_status", [])`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`exception = kwargs.pop("exception", PluginError)`
			`headers = kwargs.pop("headers", {})`
			`params = kwargs.pop("params", {})`
plugin.api.http_session: Make sure session proxies are prioritized. Normally requests will fetch proxies from the environment when a HTTP request is made, causing the session proxies to be overridden. This fix explicitly tells requests to use the session proxies when a HTTP request is made. Resolves #336. 2014-03-19 16:01:25 +01:00			`proxies = kwargs.pop("proxies", self.proxies)`
plugin.api.http_session: Add acceptable_status and raise_for_status request options. 2014-06-27 23:50:33 +02:00			`raise_for_status = kwargs.pop("raise_for_status", True)`
plugin.api.http_session: Add schema keyword argument. 2014-06-20 17:21:09 +02:00			`schema = kwargs.pop("schema", None)`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`session = kwargs.pop("session", None)`
plugin.api.http_session: Add timeout attribute. 2014-04-20 17:06:47 +02:00			`timeout = kwargs.pop("timeout", self.timeout)`
Ensure retries with HLS Streams (#522) * stream.hls: ensure that other http requests in the hls stream class make retries. eg. playlist_reload, aes key requests as well as segments * stream.hls: fix unaccounted for errors when streaming HLS data 2017-02-03 18:26:56 +01:00			`total_retries = kwargs.pop("retries", 0)`
			`retry_backoff = kwargs.pop("retry_backoff", 0.3)`
			`retry_max_backoff = kwargs.pop("retry_max_backoff", 10.0)`
			`retries = 0`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00
			`if session:`
			`headers.update(session.headers)`
			`params.update(session.params)`

Ensure retries with HLS Streams (#522) * stream.hls: ensure that other http requests in the hls stream class make retries. eg. playlist_reload, aes key requests as well as segments * stream.hls: fix unaccounted for errors when streaming HLS data 2017-02-03 18:26:56 +01:00			`while True:`
			`try:`
plugin.api.http_session: refactor HTTPSession 2021-06-02 12:30:12 +02:00			`res = super().request(`
			`method,`
			`url,`
			`headers=headers,`
			`params=params,`
			`timeout=timeout,`
			`proxies=proxies,`
			`*args,`
			`**kwargs`
			`)`
Ensure retries with HLS Streams (#522) * stream.hls: ensure that other http requests in the hls stream class make retries. eg. playlist_reload, aes key requests as well as segments * stream.hls: fix unaccounted for errors when streaming HLS data 2017-02-03 18:26:56 +01:00			`if raise_for_status and res.status_code not in acceptable_status:`
			`res.raise_for_status()`
			`break`
			`except KeyboardInterrupt:`
			`raise`
			`except Exception as rerr:`
			`if retries >= total_retries:`
plugin.api.http_session: refactor HTTPSession 2021-06-02 12:30:12 +02:00			`err = exception(f"Unable to open URL: {url} ({rerr})")`
Ensure retries with HLS Streams (#522) * stream.hls: ensure that other http requests in the hls stream class make retries. eg. playlist_reload, aes key requests as well as segments * stream.hls: fix unaccounted for errors when streaming HLS data 2017-02-03 18:26:56 +01:00			`err.err = rerr`
			`raise err`
			`retries += 1`
			`# back off retrying, but only to a maximum sleep time`
			`delay = min(retry_max_backoff,`
			`retry_backoff * (2 ** (retries - 1)))`
			`time.sleep(delay)`
Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00
plugin.api.http_session: Add schema keyword argument. 2014-06-20 17:21:09 +02:00			`if schema:`
			`res = schema.validate(res.text, name="response text", exception=PluginError)`

Add a HTTP session to the plugin API. This HTTP session is shared between all plugins loaded in the Livestreamer session. 2014-03-14 00:14:15 +01:00			`return res`
plugin.api.http_session: enforce_content_length 2021-06-02 13:47:21 +02:00

			`__all__ = ["HTTPSession"]`