plugins.huomao: fix/rewrite (#3126)

- Add support for VODs
- Add support for title, author and category information
- Update URLs in tests

closes https://github.com/streamlink/streamlink/issues/2341
This commit is contained in:
Ian Cameron 2020-08-29 17:47:41 +01:00 committed by GitHub
parent 38197fa7fd
commit 2718146203
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 234 additions and 153 deletions

View File

@ -98,7 +98,8 @@ gulli replay.gulli.fr Yes Yes Streams may be geo-rest
hitbox - hitbox.tv Yes Yes
- smashcast.tv
huajiao huajiao.com Yes No
huomao huomao.com Yes No
huomao - huomao.com Yes Yes
- huomao.tv
huya huya.com Yes No Temporarily only HLS streams available.
idf1 idf1.fr Yes Yes
ine ine.com --- Yes

View File

@ -1,101 +1,198 @@
"""
NOTE: Since a documented API is nowhere to be found for Huomao; this plugin
simply extracts the videos stream_id, stream_url and stream_quality by
scraping the HTML and JS of one of Huomaos mobile webpages.
When viewing a stream on huomao.com, the base URL references a room_id. This
room_id is mapped one-to-one to a stream_id which references the actual .m3u8
file. Both stream_id, stream_url and stream_quality can be found in the
HTML and JS source of the mobile_page. Since one stream can occur in many
different qualities, we scrape all stream_url and stream_quality occurrences
and return each option to the user.
"""
import hashlib
import logging
import re
import time
from streamlink.compat import bytes
from streamlink.exceptions import PluginError
from streamlink.plugin import Plugin
from streamlink.plugin.api import validate
from streamlink.stream import HLSStream
from streamlink.utils import parse_json
# URL pattern for recognizing inputed Huomao.tv / Huomao.com URL.
url_re = re.compile(r"""
(http(s)?://)?
(www\.)?
huomao
(\.tv|\.com)
/(?P<room_id>\d+)
""", re.VERBOSE)
# URL used to retrive the stream_id, stream_url and stream_quality based of
# a room_id.
mobile_url = "http://www.huomao.com/mobile/mob_live/{0}"
# Pattern for extracting the stream_id from the mobile_url HTML.
#
# Example from HTML:
# <input id="html_stream" value="efmrCH" type="hidden">
stream_id_pattern = re.compile(r'id=\"html_stream\" value=\"(?P<stream_id>\w+)\"')
# Pattern for extracting each stream_url and
# stream_quality_name used for quality naming.
#
# Example from HTML:
# src="http://live-ws-hls.huomaotv.cn/live/<stream_id>_720/playlist.m3u8"
stream_info_pattern = re.compile(r"""
(?P<stream_url>(?:[\w\/\.\-:]+)
\/[^_\"]+(?:_(?P<stream_quality_name>\d+))
?/playlist.m3u8)
""", re.VERBOSE)
log = logging.getLogger(__name__)
class Huomao(Plugin):
magic_val = '6FE26D855E1AEAE090E243EB1AF73685'
mobile_url = 'https://m.huomao.com/mobile/mob_live/{0}'
live_data_url = 'https://m.huomao.com/swf/live_data'
vod_url = 'https://www.huomao.com/video/vreplay/{0}'
author = None
category = None
title = None
url_re = re.compile(r'''
(?:https?://)?(?:www\.)?huomao(?:\.tv|\.com)
(?P<path>/|/video/v/)
(?P<room_id>\d+)
''', re.VERBOSE)
author_re = re.compile(
r'<p class="nickname_live">\s*<span>\s*(.*?)\s*</span>',
re.DOTALL,
)
title_re = re.compile(
r'<p class="title-name">\s*(.*?)\s*</p>',
re.DOTALL,
)
video_id_re = re.compile(r'var stream = "([^"]+)"')
video_res_re = re.compile(r'_([\d]+p?)\.m3u8')
vod_data_re = re.compile(r'var video = ({.*});')
_live_data_schema = validate.Schema({
'roomStatus': validate.transform(lambda x: int(x)),
'streamList': [{'list_hls': [{
'url': validate.url(),
}]}],
})
_vod_data_schema = validate.Schema({
'title': validate.text,
'username': validate.text,
'vaddress': validate.all(
validate.text,
validate.transform(parse_json),
[{
'url': validate.url(),
'vheight': int,
}],
),
})
@classmethod
def can_handle_url(self, url):
return url_re.match(url)
def can_handle_url(cls, url):
return cls.url_re.match(url) is not None
def get_stream_id(self, html):
"""Returns the stream_id contained in the HTML."""
stream_id = stream_id_pattern.search(html)
def _get_live_streams_data(self, video_id):
client_type = 'huomaomobileh5'
time_now = str(int(time.time()))
if not stream_id:
self.logger.error("Failed to extract stream_id.")
token_data = "{0}{1}{2}{3}".format(
video_id,
client_type,
time_now,
self.magic_val,
)
return stream_id.group("stream_id")
token = hashlib.md5(bytes(token_data, 'utf-8')).hexdigest()
log.debug("Token={0}".format(token))
def get_stream_info(self, html):
"""
Returns a nested list of different stream options.
post_data = {
'cdns': 1,
'streamtype': 'live',
'VideoIDS': video_id,
'from': client_type,
'time': time_now,
'token': token,
}
video_data = self.session.http.post(self.live_data_url, data=post_data)
Each entry in the list will contain a stream_url and stream_quality_name
for each stream occurrence that was found in the JS.
"""
stream_info = stream_info_pattern.findall(html)
return self.session.http.json(
video_data,
schema=self._live_data_schema,
)
if not stream_info:
self.logger.error("Failed to extract stream_info.")
def _get_vod_streams(self, vod_id):
res = self.session.http.get(self.vod_url.format(vod_id))
m = self.vod_data_re.search(res.text)
vod_json = m and m.group(1)
# Rename the "" quality to "source" by transforming the tuples to a
# list and reassigning.
stream_info_list = []
for info in stream_info:
if not info[1]:
stream_info_list.append([info[0], "source"])
else:
stream_info_list.append(list(info))
if vod_json is None:
raise PluginError("Failed to get VOD data")
return stream_info_list
vod_data = parse_json(vod_json, schema=self._vod_data_schema)
def _get_streams(self):
room_id = url_re.search(self.url).group("room_id")
html = self.session.http.get(mobile_url.format(room_id))
stream_id = self.get_stream_id(html.text)
stream_info = self.get_stream_info(html.text)
self.author = vod_data['username']
self.category = 'VOD'
self.title = vod_data['title']
vod_data = vod_data['vaddress']
streams = {}
for info in stream_info:
if stream_id in info[0]:
streams[info[1]] = HLSStream(self.session, info[0])
for stream in vod_data:
video_res = stream['vheight']
if 'p' not in str(video_res):
video_res = "{0}p".format(video_res)
if video_res in streams:
video_res = "{0}_alt".format(video_res)
streams[video_res] = HLSStream(self.session, stream['url'])
return streams
def _get_live_streams(self, room_id):
res = self.session.http.get(self.mobile_url.format(room_id))
m = self.author_re.search(res.text)
if m:
self.author = m.group(1)
self.category = 'Live'
m = self.title_re.search(res.text)
if m:
self.title = m.group(1)
m = self.video_id_re.search(res.text)
video_id = m and m.group(1)
if video_id is None:
raise PluginError("Failed to get video ID")
else:
log.debug("Video ID={0}".format(video_id))
streams_data = self._get_live_streams_data(video_id)
if streams_data['roomStatus'] == 0:
log.info("This room is currently inactive: {0}".format(room_id))
return
streams_data = streams_data['streamList'][0]['list_hls']
streams = {}
for stream in streams_data:
m = self.video_res_re.search(stream['url'])
video_res = m and m.group(1)
if video_res is None:
continue
if 'p' not in video_res:
video_res = "{0}p".format(video_res)
if video_res in streams:
video_res = "{0}_alt".format(video_res)
streams[video_res] = HLSStream(self.session, stream['url'])
return streams
def get_author(self):
if self.author is not None:
return self.author
def get_category(self):
if self.category is not None:
return self.category
def get_title(self):
if self.title is not None:
return self.title
def _get_streams(self):
path, url_id = self.url_re.search(self.url).groups()
log.debug("Path={0}".format(path))
log.debug("URL ID={0}".format(url_id))
if path != '/':
return self._get_vod_streams(url_id)
else:
return self._get_live_streams(url_id)
__plugin__ = Huomao

View File

@ -4,85 +4,68 @@ from streamlink.plugins.huomao import Huomao
class TestPluginHuomao(unittest.TestCase):
def setUp(self):
# Create a mock source HTML with some example data:
# room_id = 123456
# stream_id = 9qsvyF24659
# stream_url = http://live-ws.huomaotv.cn/live/
# stream_quality_name = source, 720 and 480
self.mock_html = """
<input id="html_stream" value="9qsvyF24659" type="hidden">
<source src="http://live-ws-hls.huomaotv.cn/live/9qsvyF24659/playlist.m3u8">
<source src="http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_720/playlist.m3u8">
<source src="http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_480/playlist.m3u8">
"""
# Create a mock Huomao object.
self.mock_huomao = Huomao("http://www.huomao.com/123456/")
def tearDown(self):
self.mock_html = None
self.mock_huomao = None
def test_get_stream_id(self):
# Assert that the stream_id from is correctly extracted from the mock HTML.
self.assertEqual(self.mock_huomao.get_stream_id(self.mock_html), "9qsvyF24659")
def test_get_stream_quality(self):
# Assert that the stream_url, stream_quality and stream_quality_name
# is correctly extracted from the mock HTML.
self.assertEqual(self.mock_huomao.get_stream_info(self.mock_html), [
["http://live-ws-hls.huomaotv.cn/live/9qsvyF24659/playlist.m3u8", "source"],
["http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_720/playlist.m3u8", "720"],
["http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_480/playlist.m3u8", "480"]
])
def test_can_handle_url(self):
should_match = [
# Assert that an URL containing the http:// prefix is correctly read.
"http://www.huomao.com/123456",
"http://www.huomao.tv/123456",
"http://huomao.com/123456",
"http://huomao.tv/123456",
"http://www.huomao.com/video/v/123456",
"http://www.huomao.tv/video/v/123456",
"http://huomao.com/video/v/123456",
"http://huomao.tv/video/v/123456",
# Assert that an URL containing the http:// prefix is correctly read.
self.assertTrue(Huomao.can_handle_url("http://www.huomao.com/123456"))
self.assertTrue(Huomao.can_handle_url("http://www.huomao.tv/123456"))
self.assertTrue(Huomao.can_handle_url("http://huomao.com/123456"))
self.assertTrue(Huomao.can_handle_url("http://huomao.tv/123456"))
# Assert that an URL containing the https:// prefix is correctly read.
"https://www.huomao.com/123456",
"https://www.huomao.tv/123456",
"https://huomao.com/123456",
"https://huomao.tv/123456",
"https://www.huomao.com/video/v/123456",
"https://www.huomao.tv/video/v/123456",
"https://huomao.com/video/v/123456",
"https://huomao.tv/video/v/123456",
# Assert that an URL containing the https:// prefix is correctly read.
self.assertTrue(Huomao.can_handle_url("https://www.huomao.com/123456"))
self.assertTrue(Huomao.can_handle_url("https://www.huomao.tv/123456"))
self.assertTrue(Huomao.can_handle_url("https://huomao.com/123456"))
self.assertTrue(Huomao.can_handle_url("https://huomao.tv/123456"))
# Assert that an URL without the http(s):// prefix is correctly read.
"www.huomao.com/123456",
"www.huomao.tv/123456",
"www.huomao.com/video/v/123456",
"www.huomao.tv/video/v/123456",
# Assert that an URL without the http(s):// prefix is correctly read.
self.assertTrue(Huomao.can_handle_url("www.huomao.com/123456"))
self.assertTrue(Huomao.can_handle_url("www.huomao.tv/123456"))
# Assert that an URL without the www prefix is correctly read.
"huomao.com/123456",
"huomao.tv/123456",
"huomao.com/video/v/123456",
"huomao.tv/video/v/123456",
]
for url in should_match:
self.assertTrue(Huomao.can_handle_url(url))
# Assert that an URL without the www prefix is correctly read.
self.assertTrue(Huomao.can_handle_url("huomao.com/123456"))
self.assertTrue(Huomao.can_handle_url("huomao.tv/123456"))
def test_can_handle_url_negative(self):
should_not_match = [
# Assert that an URL without a room_id can't be read.
"http://www.huomao.com/",
"http://www.huomao.tv/",
"http://huomao.com/",
"http://huomao.tv/",
"https://www.huomao.com/",
"https://www.huomao.tv/",
"https://huomao.com/",
"https://huomao.tv/",
"www.huomao.com/",
"www.huomao.tv/",
"huomao.tv/",
"huomao.tv/",
# Assert that an URL without a room_id can't be read.
self.assertFalse(Huomao.can_handle_url("http://www.huomao.com/"))
self.assertFalse(Huomao.can_handle_url("http://www.huomao.tv/"))
self.assertFalse(Huomao.can_handle_url("http://huomao.com/"))
self.assertFalse(Huomao.can_handle_url("http://huomao.tv/"))
self.assertFalse(Huomao.can_handle_url("https://www.huomao.com/"))
self.assertFalse(Huomao.can_handle_url("https://www.huomao.tv/"))
self.assertFalse(Huomao.can_handle_url("https://huomao.com/"))
self.assertFalse(Huomao.can_handle_url("https://huomao.tv/"))
self.assertFalse(Huomao.can_handle_url("www.huomao.com/"))
self.assertFalse(Huomao.can_handle_url("www.huomao.tv/"))
self.assertFalse(Huomao.can_handle_url("huomao.tv/"))
self.assertFalse(Huomao.can_handle_url("huomao.tv/"))
# Assert that an URL without "huomao" can't be read.
self.assertFalse(Huomao.can_handle_url("http://www.youtube.com/123456"))
self.assertFalse(Huomao.can_handle_url("http://www.youtube.tv/123456"))
self.assertFalse(Huomao.can_handle_url("http://youtube.com/123456"))
self.assertFalse(Huomao.can_handle_url("http://youtube.tv/123456"))
self.assertFalse(Huomao.can_handle_url("https://www.youtube.com/123456"))
self.assertFalse(Huomao.can_handle_url("https://www.youtube.tv/123456"))
self.assertFalse(Huomao.can_handle_url("https://youtube.com/123456"))
self.assertFalse(Huomao.can_handle_url("https://youtube.tv/123456"))
# Assert that an URL without "huomao" can't be read.
"http://www.youtube.com/123456",
"http://www.youtube.tv/123456",
"http://youtube.com/123456",
"http://youtube.tv/123456",
"https://www.youtube.com/123456",
"https://www.youtube.tv/123456",
"https://youtube.com/123456",
"https://youtube.tv/123456",
]
for url in should_not_match:
self.assertFalse(Huomao.can_handle_url(url))