streamlink/src/streamlink/plugins/crunchyroll.py

401 lines
14 KiB
Python

"""
$description Video streaming service focused on anime, manga, and dorama.
$url crunchyroll.com
$type vod
"""
import logging
import re
from uuid import uuid4
from streamlink.plugin import Plugin, PluginError, pluginargument, pluginmatcher
from streamlink.plugin.api import validate
from streamlink.stream.hls import HLSStream
from streamlink.utils.times import parse_datetime
log = logging.getLogger(__name__)
STREAM_WEIGHTS = {
"low": 240,
"mid": 420,
"high": 720,
"ultra": 1080,
}
STREAM_NAMES = {
"120k": "low",
"328k": "mid",
"864k": "high",
}
_api_schema = validate.Schema({
"error": bool,
validate.optional("code"): str,
validate.optional("message"): str,
validate.optional("data"): object,
})
_media_schema = validate.Schema(
{
validate.optional("name"): validate.any(str, None),
validate.optional("series_name"): validate.any(str, None),
validate.optional("media_type"): validate.any(str, None),
"stream_data": validate.any(
None,
{
"streams": validate.all(
[{
"quality": validate.any(str, None),
"url": validate.url(
scheme="http",
path=validate.endswith(".m3u8"),
),
validate.optional("video_encode_id"): str,
}],
),
},
),
},
)
_login_schema = validate.Schema({
"auth": validate.any(str, None),
"expires": validate.all(
str,
validate.transform(parse_datetime),
),
"user": {
"username": validate.any(str, None),
"email": str,
},
})
_session_schema = validate.Schema(
{
"session_id": str,
},
validate.get("session_id"),
)
class CrunchyrollAPIError(Exception):
"""Exception thrown by the Crunchyroll API when an error occurs"""
def __init__(self, msg, code):
Exception.__init__(self, msg)
self.msg = msg
self.code = code
class CrunchyrollAPI:
_api_url = "https://api.crunchyroll.com/{0}.0.json"
_default_locale = "en_US"
_version_name = "1.3.1.0"
_access_token = "LNDJgOit5yaRIWN"
_access_type = "com.crunchyroll.windows.desktop"
def __init__(self, cache, session, session_id=None, locale=_default_locale):
"""Abstract the API to access to Crunchyroll data.
Can take saved credentials to use on its calls to the API.
"""
self.cache = cache
self.session = session
self.session_id = session_id
if self.session_id: # if the session ID is setup don't use the cached auth token
self.auth = None
else:
self.auth = cache.get("auth")
self.device_id = cache.get("device_id") or self.generate_device_id()
self.locale = locale
def _api_call(self, entrypoint, params=None, schema=None):
"""Makes a call against the api.
:param entrypoint: API method to call.
:param params: parameters to include in the request data.
:param schema: schema to use to validate the data
"""
url = self._api_url.format(entrypoint)
# Default params
params = params or {}
if self.session_id:
params.update({
"session_id": self.session_id,
})
else:
params.update({
"device_id": self.device_id,
"device_type": self._access_type,
"access_token": self._access_token,
})
params.update({
"locale": self.locale.replace("_", ""),
"version": self._version_name,
"connectivity_type": "ethernet",
})
if self.session_id:
params["session_id"] = self.session_id
res = self.session.http.post(url, data=params)
json_res = self.session.http.json(res, schema=_api_schema)
if json_res["error"]:
err_msg = json_res.get("message", "Unknown error")
err_code = json_res.get("code", "unknown_error")
raise CrunchyrollAPIError(err_msg, err_code)
data = json_res.get("data")
if schema:
data = schema.validate(data, name="API response")
return data
def generate_device_id(self):
device_id = str(uuid4())
# cache the device id
self.cache.set("device_id", device_id, expires=365 * 24 * 60 * 60)
log.debug("Device ID: {0}".format(device_id))
return device_id
def start_session(self):
"""
Starts a session against Crunchyroll's server.
Is recommended that you call this method before making any other calls
to make sure you have a valid session against the server.
"""
params = {}
if self.auth:
params["auth"] = self.auth
self.session_id = self._api_call("start_session", params, schema=_session_schema)
log.debug("Session created with ID: {0}".format(self.session_id))
return self.session_id
def login(self, username, password):
"""
Authenticates the session to be able to access restricted data from
the server (e.g. premium restricted videos).
"""
params = {
"account": username,
"password": password,
}
login = self._api_call("login", params, schema=_login_schema)
self.auth = login["auth"]
self.cache.set("auth", login["auth"], expires_at=login["expires"])
return login
def authenticate(self):
try:
data = self._api_call("authenticate", {"auth": self.auth}, schema=_login_schema)
except CrunchyrollAPIError:
self.auth = None
self.cache.set("auth", None, expires=0)
log.warning("Saved credentials have expired")
return
log.debug("Credentials expire at: {}".format(data["expires"]))
self.cache.set("auth", self.auth, expires_at=data["expires"])
return data
def get_info(self, media_id, fields=None, schema=None):
"""
Returns the data for a certain media item.
:param media_id: id that identifies the media item to be accessed.
:param fields: list of the media"s field to be returned. By default the
API returns some fields, but others are not returned unless they are
explicity asked for. I have no real documentation on the fields, but
they all seem to start with the "media." prefix (e.g. media.name,
media.stream_data).
:param schema: validation schema to use
"""
params = {"media_id": media_id}
if fields:
params["fields"] = ",".join(fields)
return self._api_call("info", params, schema=schema)
@pluginmatcher(re.compile(r"""
https?://(\w+\.)?crunchyroll\.
(?:
com|de|es|fr|co\.jp
)
(?:
/(en-gb|es|es-es|pt-pt|pt-br|fr|de|ar|it|ru)
)?
(?:
(?:
(?:/[^/&?]+)?
/[^/&?]+-(?P<media_id>\d+)
)
|
/watch/(?P<beta_id>\w+)/[\w-]+
)
""", re.VERBOSE))
@pluginargument(
"username",
requires=["password"],
metavar="USERNAME",
help="A Crunchyroll username to allow access to restricted streams.",
)
@pluginargument(
"password",
sensitive=True,
metavar="PASSWORD",
nargs="?",
const=None,
default=None,
help="""
A Crunchyroll password for use with --crunchyroll-username.
If left blank you will be prompted.
""",
)
@pluginargument(
"purge-credentials",
action="store_true",
help="Purge cached Crunchyroll credentials to initiate a new session and reauthenticate.",
)
@pluginargument(
"session-id",
sensitive=True,
metavar="SESSION_ID",
help="""
Set a specific session ID for crunchyroll, can be used to bypass
region restrictions. If using an authenticated session ID, it is
recommended that the authentication parameters be omitted as the
session ID is account specific.
Note: The session ID will be overwritten if authentication is used
and the session ID does not match the account.
""",
)
class Crunchyroll(Plugin):
@classmethod
def stream_weight(cls, key):
weight = STREAM_WEIGHTS.get(key)
if weight:
return weight, "crunchyroll"
return Plugin.stream_weight(key)
def _get_streams(self):
beta_id = self.match.group("beta_id")
if beta_id:
json = self.session.http.get(self.url, schema=validate.Schema(
validate.parse_html(),
validate.xml_xpath_string(".//script[contains(text(), 'window.__INITIAL_STATE__')]/text()"),
validate.none_or_all(
re.compile(r"window.__INITIAL_STATE__\s*=\s*({.*});"),
validate.none_or_all(
validate.get(1),
validate.parse_json(),
validate.none_or_all(
{"content": {"byId": {str: {"external_id": validate.all(
validate.transform(lambda s: int(s.replace("EPI.", ""))),
)}}}},
validate.get(("content", "byId")),
),
),
),
))
if not json or beta_id not in json:
return
media_id = json[beta_id]["external_id"]
else:
media_id = int(self.match.group("media_id"))
api = self._create_api()
try:
# the media.stream_data field is required, no stream data is returned otherwise
info = api.get_info(media_id, fields=["media.name", "media.series_name",
"media.media_type", "media.stream_data"], schema=_media_schema)
except CrunchyrollAPIError as err:
raise PluginError(f"Media lookup error: {err.msg}") from err
if not info:
return
streams = {}
self.id = media_id
self.title = info.get("name")
self.author = info.get("series_name")
self.category = info.get("media_type")
info = info["stream_data"]
# The adaptive quality stream sometimes a subset of all the other streams listed, ultra is no included
has_adaptive = any(s["quality"] == "adaptive" for s in info["streams"])
if has_adaptive:
log.debug("Loading streams from adaptive playlist")
for stream in filter(lambda x: x["quality"] == "adaptive", info["streams"]):
for q, s in HLSStream.parse_variant_playlist(self.session, stream["url"]).items():
# rename the bitrates to low, mid, or high. ultra doesn't seem to appear in the adaptive streams
name = STREAM_NAMES.get(q, q)
streams[name] = s
# If there is no adaptive quality stream then parse each individual result
for stream in info["streams"]:
if stream["quality"] != "adaptive":
# the video_encode_id indicates that the stream is not a variant playlist
if "video_encode_id" in stream:
streams[stream["quality"]] = HLSStream(self.session, stream["url"])
else:
# otherwise the stream url is actually a list of stream qualities
for q, s in HLSStream.parse_variant_playlist(self.session, stream["url"]).items():
# rename the bitrates to low, mid, or high. ultra doesn't seem to appear in the adaptive streams
name = STREAM_NAMES.get(q, q)
streams[name] = s
return streams
def _create_api(self):
"""Creates a new CrunchyrollAPI object, initiates its session and
tries to authenticate it either by using saved credentials or the
user's username and password.
"""
if self.options.get("purge_credentials"):
self.cache.set("device_id", None, expires=0)
self.cache.set("auth", None, expires=0)
# use the crunchyroll locale as an override, for backwards compatibility
locale = self.get_option("locale") or self.session.localization.language_code
api = CrunchyrollAPI(self.cache,
self.session,
session_id=self.get_option("session_id"),
locale=locale)
if not self.get_option("session_id"):
log.debug(f"Creating session with locale: {locale}")
api.start_session()
if api.auth:
log.debug("Using saved credentials")
login = api.authenticate()
if login:
login_name = login["user"]["username"] or login["user"]["email"]
log.info(f"Successfully logged in as '{login_name}'")
if not api.auth and self.options.get("username"):
try:
log.debug("Attempting to login using username and password")
api.login(self.options.get("username"),
self.options.get("password"))
login = api.authenticate()
login_name = login["user"]["username"] or login["user"]["email"]
log.info(f"Logged in as '{login_name}'")
except CrunchyrollAPIError as err:
raise PluginError(f"Authentication error: {err.msg}") from err
if not api.auth:
log.warning("No authentication provided, you won't be able to access premium restricted content")
return api
__plugin__ = Crunchyroll