2017-02-03 18:26:56 +01:00
|
|
|
import time
|
2020-10-26 14:46:17 +01:00
|
|
|
|
2021-06-02 13:47:21 +02:00
|
|
|
import requests.adapters
|
|
|
|
import urllib3
|
2021-01-06 18:09:26 +01:00
|
|
|
from requests import Session
|
2017-02-03 18:26:56 +01:00
|
|
|
|
2020-10-26 14:46:17 +01:00
|
|
|
from streamlink.exceptions import PluginError
|
2016-12-16 06:18:53 +01:00
|
|
|
from streamlink.packages.requests_file import FileAdapter
|
2020-02-08 19:02:16 +01:00
|
|
|
from streamlink.plugin.api import useragents
|
2020-10-26 14:46:17 +01:00
|
|
|
from streamlink.utils import parse_json, parse_xml
|
2014-03-14 00:14:15 +01:00
|
|
|
|
2014-08-30 02:17:00 +02:00
|
|
|
|
2021-06-02 13:47:21 +02:00
|
|
|
try:
|
2014-08-30 02:17:00 +02:00
|
|
|
# We tell urllib3 to disable warnings about unverified HTTPS requests,
|
|
|
|
# because in some plugins we have to do unverified requests intentionally.
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
2021-06-02 13:47:21 +02:00
|
|
|
except AttributeError:
|
2014-08-30 02:17:00 +02:00
|
|
|
pass
|
|
|
|
|
2021-06-02 13:47:21 +02:00
|
|
|
|
|
|
|
class _HTTPResponse(urllib3.response.HTTPResponse):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
# Always enforce content length validation!
|
|
|
|
# This fixes a bug in requests which doesn't raise errors on HTTP responses where
|
|
|
|
# the "Content-Length" header doesn't match the response's body length.
|
|
|
|
# https://github.com/psf/requests/issues/4956#issuecomment-573325001
|
|
|
|
#
|
|
|
|
# Summary:
|
|
|
|
# This bug is related to urllib3.response.HTTPResponse.stream() which calls urllib3.response.HTTPResponse.read() as
|
|
|
|
# a wrapper for http.client.HTTPResponse.read(amt=...), where no http.client.IncompleteRead exception gets raised
|
|
|
|
# due to "backwards compatiblity" of an old bug if a specific amount is attempted to be read on an incomplete response.
|
|
|
|
#
|
|
|
|
# urllib3.response.HTTPResponse.read() however has an additional check implemented via the enforce_content_length
|
|
|
|
# parameter, but it doesn't check by default and requests doesn't set the parameter for enabling it either.
|
|
|
|
#
|
|
|
|
# Fix this by overriding urllib3.response.HTTPResponse's constructor and always setting enforce_content_length to True,
|
|
|
|
# as there is no way to make requests set this parameter on its own.
|
|
|
|
kwargs.update({"enforce_content_length": True})
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
# override all urllib3.response.HTTPResponse references in requests.adapters.HTTPAdapter.send
|
|
|
|
urllib3.connectionpool.HTTPConnectionPool.ResponseCls = _HTTPResponse
|
|
|
|
requests.adapters.HTTPResponse = _HTTPResponse
|
2014-03-14 00:14:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
def _parse_keyvalue_list(val):
|
|
|
|
for keyvalue in val.split(";"):
|
|
|
|
try:
|
2014-06-26 02:14:12 +02:00
|
|
|
key, value = keyvalue.split("=", 1)
|
2014-03-14 00:14:15 +01:00
|
|
|
yield key.strip(), value.strip()
|
|
|
|
except ValueError:
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
class HTTPSession(Session):
|
2021-06-02 12:30:12 +02:00
|
|
|
def __init__(self):
|
|
|
|
super().__init__()
|
2020-02-08 19:02:16 +01:00
|
|
|
|
2021-06-02 12:30:12 +02:00
|
|
|
self.headers['User-Agent'] = useragents.FIREFOX
|
2014-04-20 17:06:47 +02:00
|
|
|
self.timeout = 20.0
|
|
|
|
|
2016-12-16 06:18:53 +01:00
|
|
|
self.mount('file://', FileAdapter())
|
|
|
|
|
2016-11-03 17:35:57 +01:00
|
|
|
@classmethod
|
|
|
|
def determine_json_encoding(cls, sample):
|
|
|
|
"""
|
|
|
|
Determine which Unicode encoding the JSON text sample is encoded with
|
|
|
|
|
|
|
|
RFC4627 (http://www.ietf.org/rfc/rfc4627.txt) suggests that the encoding of JSON text can be determined
|
|
|
|
by checking the pattern of NULL bytes in first 4 octets of the text.
|
|
|
|
:param sample: a sample of at least 4 bytes of the JSON text
|
|
|
|
:return: the most likely encoding of the JSON text
|
|
|
|
"""
|
|
|
|
nulls_at = [i for i, j in enumerate(bytearray(sample[:4])) if j == 0]
|
|
|
|
if nulls_at == [0, 1, 2]:
|
|
|
|
return "UTF-32BE"
|
|
|
|
elif nulls_at == [0, 2]:
|
|
|
|
return "UTF-16BE"
|
|
|
|
elif nulls_at == [1, 2, 3]:
|
|
|
|
return "UTF-32LE"
|
|
|
|
elif nulls_at == [1, 3]:
|
|
|
|
return "UTF-16LE"
|
|
|
|
else:
|
|
|
|
return "UTF-8"
|
|
|
|
|
2014-03-14 00:14:15 +01:00
|
|
|
@classmethod
|
|
|
|
def json(cls, res, *args, **kwargs):
|
|
|
|
"""Parses JSON from a response."""
|
2016-11-03 17:35:57 +01:00
|
|
|
# if an encoding is already set then use the provided encoding
|
|
|
|
if res.encoding is None:
|
|
|
|
res.encoding = cls.determine_json_encoding(res.content[:4])
|
2014-03-14 00:14:15 +01:00
|
|
|
return parse_json(res.text, *args, **kwargs)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def xml(cls, res, *args, **kwargs):
|
|
|
|
"""Parses XML from a response."""
|
|
|
|
return parse_xml(res.text, *args, **kwargs)
|
|
|
|
|
|
|
|
def parse_cookies(self, cookies, **kwargs):
|
|
|
|
"""Parses a semi-colon delimited list of cookies.
|
|
|
|
|
|
|
|
Example: foo=bar;baz=qux
|
|
|
|
"""
|
|
|
|
for name, value in _parse_keyvalue_list(cookies):
|
|
|
|
self.cookies.set(name, value, **kwargs)
|
|
|
|
|
|
|
|
def parse_headers(self, headers):
|
|
|
|
"""Parses a semi-colon delimited list of headers.
|
|
|
|
|
|
|
|
Example: foo=bar;baz=qux
|
|
|
|
"""
|
|
|
|
for name, value in _parse_keyvalue_list(headers):
|
|
|
|
self.headers[name] = value
|
|
|
|
|
|
|
|
def parse_query_params(self, cookies, **kwargs):
|
|
|
|
"""Parses a semi-colon delimited list of query parameters.
|
|
|
|
|
|
|
|
Example: foo=bar;baz=qux
|
|
|
|
"""
|
|
|
|
for name, value in _parse_keyvalue_list(cookies):
|
|
|
|
self.params[name] = value
|
|
|
|
|
2014-04-20 22:00:52 +02:00
|
|
|
def resolve_url(self, url):
|
|
|
|
"""Resolves any redirects and returns the final URL."""
|
|
|
|
return self.get(url, stream=True).url
|
|
|
|
|
2014-03-14 00:14:15 +01:00
|
|
|
def request(self, method, url, *args, **kwargs):
|
2014-06-27 23:50:33 +02:00
|
|
|
acceptable_status = kwargs.pop("acceptable_status", [])
|
2014-03-14 00:14:15 +01:00
|
|
|
exception = kwargs.pop("exception", PluginError)
|
|
|
|
headers = kwargs.pop("headers", {})
|
|
|
|
params = kwargs.pop("params", {})
|
2014-03-19 16:01:25 +01:00
|
|
|
proxies = kwargs.pop("proxies", self.proxies)
|
2014-06-27 23:50:33 +02:00
|
|
|
raise_for_status = kwargs.pop("raise_for_status", True)
|
2014-06-20 17:21:09 +02:00
|
|
|
schema = kwargs.pop("schema", None)
|
2014-03-14 00:14:15 +01:00
|
|
|
session = kwargs.pop("session", None)
|
2014-04-20 17:06:47 +02:00
|
|
|
timeout = kwargs.pop("timeout", self.timeout)
|
2017-02-03 18:26:56 +01:00
|
|
|
total_retries = kwargs.pop("retries", 0)
|
|
|
|
retry_backoff = kwargs.pop("retry_backoff", 0.3)
|
|
|
|
retry_max_backoff = kwargs.pop("retry_max_backoff", 10.0)
|
|
|
|
retries = 0
|
2014-03-14 00:14:15 +01:00
|
|
|
|
|
|
|
if session:
|
|
|
|
headers.update(session.headers)
|
|
|
|
params.update(session.params)
|
|
|
|
|
2017-02-03 18:26:56 +01:00
|
|
|
while True:
|
|
|
|
try:
|
2021-06-02 12:30:12 +02:00
|
|
|
res = super().request(
|
|
|
|
method,
|
|
|
|
url,
|
|
|
|
headers=headers,
|
|
|
|
params=params,
|
|
|
|
timeout=timeout,
|
|
|
|
proxies=proxies,
|
|
|
|
*args,
|
|
|
|
**kwargs
|
|
|
|
)
|
2017-02-03 18:26:56 +01:00
|
|
|
if raise_for_status and res.status_code not in acceptable_status:
|
|
|
|
res.raise_for_status()
|
|
|
|
break
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
raise
|
|
|
|
except Exception as rerr:
|
|
|
|
if retries >= total_retries:
|
2021-06-02 12:30:12 +02:00
|
|
|
err = exception(f"Unable to open URL: {url} ({rerr})")
|
2017-02-03 18:26:56 +01:00
|
|
|
err.err = rerr
|
|
|
|
raise err
|
|
|
|
retries += 1
|
|
|
|
# back off retrying, but only to a maximum sleep time
|
|
|
|
delay = min(retry_max_backoff,
|
|
|
|
retry_backoff * (2 ** (retries - 1)))
|
|
|
|
time.sleep(delay)
|
2014-03-14 00:14:15 +01:00
|
|
|
|
2014-06-20 17:21:09 +02:00
|
|
|
if schema:
|
|
|
|
res = schema.validate(res.text, name="response text", exception=PluginError)
|
|
|
|
|
2014-03-14 00:14:15 +01:00
|
|
|
return res
|
2021-06-02 13:47:21 +02:00
|
|
|
|
|
|
|
|
|
|
|
__all__ = ["HTTPSession"]
|