plugins.huomao: fix/rewrite (#3126)

- Add support for VODs - Add support for title, author and category information - Update URLs in tests closes https://github.com/streamlink/streamlink/issues/2341
2020-08-29 17:47:41 +01:00 · 2020-08-29 17:47:41 +01:00 · 2718146203
parent 38197fa7fd
commit 2718146203
3 changed files with 234 additions and 153 deletions
--- a/docs/plugin_matrix.rst
+++ b/docs/plugin_matrix.rst
@ -98,7 +98,8 @@ gulli                   replay.gulli.fr      Yes   Yes   Streams may be geo-rest
 hitbox                  - hitbox.tv          Yes   Yes
                        - smashcast.tv
 huajiao                 huajiao.com          Yes   No
-huomao                  huomao.com           Yes   No
+huomao                  - huomao.com         Yes   Yes
+                        - huomao.tv
 huya                    huya.com             Yes   No    Temporarily only HLS streams available.
 idf1                    idf1.fr              Yes   Yes
 ine                     ine.com              ---   Yes
--- a/src/streamlink/plugins/huomao.py
+++ b/src/streamlink/plugins/huomao.py
@ -1,101 +1,198 @@
-"""
-NOTE: Since a documented API is nowhere to be found for Huomao; this plugin
-simply extracts the videos stream_id, stream_url and stream_quality by
-scraping the HTML and JS of one of Huomaos mobile webpages.
-
-When viewing a stream on huomao.com, the base URL references a room_id. This
-room_id is mapped one-to-one to a stream_id which references the actual .m3u8
-file. Both stream_id, stream_url and stream_quality can be found in the
-HTML and JS source of the mobile_page. Since one stream can occur in many
-different qualities, we scrape all stream_url and stream_quality occurrences
-and return each option to the user.
-"""
-
+import hashlib
+import logging
 import re
+import time

+from streamlink.compat import bytes
+from streamlink.exceptions import PluginError
 from streamlink.plugin import Plugin
+from streamlink.plugin.api import validate
 from streamlink.stream import HLSStream
+from streamlink.utils import parse_json

-# URL pattern for recognizing inputed Huomao.tv / Huomao.com URL.
-url_re = re.compile(r"""
-    (http(s)?://)?
-    (www\.)?
-    huomao
-    (\.tv|\.com)
-    /(?P<room_id>\d+)
-""", re.VERBOSE)
-
-# URL used to retrive the stream_id, stream_url and stream_quality based of
-# a room_id.
-mobile_url = "http://www.huomao.com/mobile/mob_live/{0}"
-
-# Pattern for extracting the stream_id from the mobile_url HTML.
-#
-# Example from HTML:
-#   <input id="html_stream" value="efmrCH" type="hidden">
-stream_id_pattern = re.compile(r'id=\"html_stream\" value=\"(?P<stream_id>\w+)\"')
-
-# Pattern for extracting each stream_url and
-# stream_quality_name used for quality naming.
-#
-# Example from HTML:
-#   src="http://live-ws-hls.huomaotv.cn/live/<stream_id>_720/playlist.m3u8"
-stream_info_pattern = re.compile(r"""
-    (?P<stream_url>(?:[\w\/\.\-:]+)
-    \/[^_\"]+(?:_(?P<stream_quality_name>\d+))
-    ?/playlist.m3u8)
-""", re.VERBOSE)
+log = logging.getLogger(__name__)


 class Huomao(Plugin):
+    magic_val = '6FE26D855E1AEAE090E243EB1AF73685'
+    mobile_url = 'https://m.huomao.com/mobile/mob_live/{0}'
+    live_data_url = 'https://m.huomao.com/swf/live_data'
+    vod_url = 'https://www.huomao.com/video/vreplay/{0}'
+
+    author = None
+    category = None
+    title = None
+
+    url_re = re.compile(r'''
+        (?:https?://)?(?:www\.)?huomao(?:\.tv|\.com)
+        (?P<path>/|/video/v/)
+        (?P<room_id>\d+)
+    ''', re.VERBOSE)
+
+    author_re = re.compile(
+        r'<p class="nickname_live">\s*<span>\s*(.*?)\s*</span>',
+        re.DOTALL,
+    )
+
+    title_re = re.compile(
+        r'<p class="title-name">\s*(.*?)\s*</p>',
+        re.DOTALL,
+    )
+
+    video_id_re = re.compile(r'var stream = "([^"]+)"')
+    video_res_re = re.compile(r'_([\d]+p?)\.m3u8')
+    vod_data_re = re.compile(r'var video = ({.*});')
+
+    _live_data_schema = validate.Schema({
+        'roomStatus': validate.transform(lambda x: int(x)),
+        'streamList': [{'list_hls': [{
+            'url': validate.url(),
+        }]}],
+    })
+
+    _vod_data_schema = validate.Schema({
+        'title': validate.text,
+        'username': validate.text,
+        'vaddress': validate.all(
+            validate.text,
+            validate.transform(parse_json),
+            [{
+                'url': validate.url(),
+                'vheight': int,
+            }],
+        ),
+    })
+
    @classmethod
-    def can_handle_url(self, url):
-        return url_re.match(url)
+    def can_handle_url(cls, url):
+        return cls.url_re.match(url) is not None

-    def get_stream_id(self, html):
-        """Returns the stream_id contained in the HTML."""
-        stream_id = stream_id_pattern.search(html)
+    def _get_live_streams_data(self, video_id):
+        client_type = 'huomaomobileh5'
+        time_now = str(int(time.time()))

-        if not stream_id:
-            self.logger.error("Failed to extract stream_id.")
+        token_data = "{0}{1}{2}{3}".format(
+            video_id,
+            client_type,
+            time_now,
+            self.magic_val,
+        )

-        return stream_id.group("stream_id")
+        token = hashlib.md5(bytes(token_data, 'utf-8')).hexdigest()
+        log.debug("Token={0}".format(token))

-    def get_stream_info(self, html):
-        """
-        Returns a nested list of different stream options.
+        post_data = {
+            'cdns': 1,
+            'streamtype': 'live',
+            'VideoIDS': video_id,
+            'from': client_type,
+            'time': time_now,
+            'token': token,
+        }
+        video_data = self.session.http.post(self.live_data_url, data=post_data)

-        Each entry in the list will contain a stream_url and stream_quality_name
-        for each stream occurrence that was found in the JS.
-        """
-        stream_info = stream_info_pattern.findall(html)
+        return self.session.http.json(
+            video_data,
+            schema=self._live_data_schema,
+        )

-        if not stream_info:
-            self.logger.error("Failed to extract stream_info.")
+    def _get_vod_streams(self, vod_id):
+        res = self.session.http.get(self.vod_url.format(vod_id))
+        m = self.vod_data_re.search(res.text)
+        vod_json = m and m.group(1)

-        # Rename the "" quality to "source" by transforming the tuples to a
-        # list and reassigning.
-        stream_info_list = []
-        for info in stream_info:
-            if not info[1]:
-                stream_info_list.append([info[0], "source"])
-            else:
-                stream_info_list.append(list(info))
+        if vod_json is None:
+            raise PluginError("Failed to get VOD data")

-        return stream_info_list
+        vod_data = parse_json(vod_json, schema=self._vod_data_schema)

-    def _get_streams(self):
-        room_id = url_re.search(self.url).group("room_id")
-        html = self.session.http.get(mobile_url.format(room_id))
-        stream_id = self.get_stream_id(html.text)
-        stream_info = self.get_stream_info(html.text)
+        self.author = vod_data['username']
+        self.category = 'VOD'
+        self.title = vod_data['title']
+
+        vod_data = vod_data['vaddress']

        streams = {}
-        for info in stream_info:
-            if stream_id in info[0]:
-                streams[info[1]] = HLSStream(self.session, info[0])
+        for stream in vod_data:
+            video_res = stream['vheight']
+
+            if 'p' not in str(video_res):
+                video_res = "{0}p".format(video_res)
+
+            if video_res in streams:
+                video_res = "{0}_alt".format(video_res)
+
+            streams[video_res] = HLSStream(self.session, stream['url'])

        return streams

+    def _get_live_streams(self, room_id):
+        res = self.session.http.get(self.mobile_url.format(room_id))
+
+        m = self.author_re.search(res.text)
+        if m:
+            self.author = m.group(1)
+
+        self.category = 'Live'
+
+        m = self.title_re.search(res.text)
+        if m:
+            self.title = m.group(1)
+
+        m = self.video_id_re.search(res.text)
+        video_id = m and m.group(1)
+
+        if video_id is None:
+            raise PluginError("Failed to get video ID")
+        else:
+            log.debug("Video ID={0}".format(video_id))
+
+        streams_data = self._get_live_streams_data(video_id)
+
+        if streams_data['roomStatus'] == 0:
+            log.info("This room is currently inactive: {0}".format(room_id))
+            return
+
+        streams_data = streams_data['streamList'][0]['list_hls']
+
+        streams = {}
+        for stream in streams_data:
+            m = self.video_res_re.search(stream['url'])
+            video_res = m and m.group(1)
+            if video_res is None:
+                continue
+
+            if 'p' not in video_res:
+                video_res = "{0}p".format(video_res)
+
+            if video_res in streams:
+                video_res = "{0}_alt".format(video_res)
+
+            streams[video_res] = HLSStream(self.session, stream['url'])
+
+        return streams
+
+    def get_author(self):
+        if self.author is not None:
+            return self.author
+
+    def get_category(self):
+        if self.category is not None:
+            return self.category
+
+    def get_title(self):
+        if self.title is not None:
+            return self.title
+
+    def _get_streams(self):
+        path, url_id = self.url_re.search(self.url).groups()
+        log.debug("Path={0}".format(path))
+        log.debug("URL ID={0}".format(url_id))
+
+        if path != '/':
+            return self._get_vod_streams(url_id)
+        else:
+            return self._get_live_streams(url_id)
+

 __plugin__ = Huomao
--- a/tests/plugins/test_huomao.py
+++ b/tests/plugins/test_huomao.py
@ -4,85 +4,68 @@ from streamlink.plugins.huomao import Huomao


 class TestPluginHuomao(unittest.TestCase):
-
-    def setUp(self):
-
-        # Create a mock source HTML with some example data:
-        #   room_id             = 123456
-        #   stream_id           = 9qsvyF24659
-        #   stream_url          = http://live-ws.huomaotv.cn/live/
-        #   stream_quality_name = source, 720 and 480
-        self.mock_html = """
-            <input id="html_stream" value="9qsvyF24659" type="hidden">
-            <source  src="http://live-ws-hls.huomaotv.cn/live/9qsvyF24659/playlist.m3u8">
-            <source  src="http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_720/playlist.m3u8">
-            <source  src="http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_480/playlist.m3u8">
-        """
-
-        # Create a mock Huomao object.
-        self.mock_huomao = Huomao("http://www.huomao.com/123456/")
-
-    def tearDown(self):
-        self.mock_html = None
-        self.mock_huomao = None
-
-    def test_get_stream_id(self):
-
-        # Assert that the stream_id from is correctly extracted from the mock HTML.
-        self.assertEqual(self.mock_huomao.get_stream_id(self.mock_html), "9qsvyF24659")
-
-    def test_get_stream_quality(self):
-
-        # Assert that the stream_url, stream_quality and stream_quality_name
-        # is correctly extracted from the mock HTML.
-        self.assertEqual(self.mock_huomao.get_stream_info(self.mock_html), [
-            ["http://live-ws-hls.huomaotv.cn/live/9qsvyF24659/playlist.m3u8", "source"],
-            ["http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_720/playlist.m3u8", "720"],
-            ["http://live-ws-hls.huomaotv.cn/live/9qsvyF24659_480/playlist.m3u8", "480"]
-        ])
-
    def test_can_handle_url(self):
+        should_match = [
+            # Assert that an URL containing the http:// prefix is correctly read.
+            "http://www.huomao.com/123456",
+            "http://www.huomao.tv/123456",
+            "http://huomao.com/123456",
+            "http://huomao.tv/123456",
+            "http://www.huomao.com/video/v/123456",
+            "http://www.huomao.tv/video/v/123456",
+            "http://huomao.com/video/v/123456",
+            "http://huomao.tv/video/v/123456",

-        # Assert that an URL containing the http:// prefix is correctly read.
-        self.assertTrue(Huomao.can_handle_url("http://www.huomao.com/123456"))
-        self.assertTrue(Huomao.can_handle_url("http://www.huomao.tv/123456"))
-        self.assertTrue(Huomao.can_handle_url("http://huomao.com/123456"))
-        self.assertTrue(Huomao.can_handle_url("http://huomao.tv/123456"))
+            # Assert that an URL containing the https:// prefix is correctly read.
+            "https://www.huomao.com/123456",
+            "https://www.huomao.tv/123456",
+            "https://huomao.com/123456",
+            "https://huomao.tv/123456",
+            "https://www.huomao.com/video/v/123456",
+            "https://www.huomao.tv/video/v/123456",
+            "https://huomao.com/video/v/123456",
+            "https://huomao.tv/video/v/123456",

-        # Assert that an URL containing the https:// prefix is correctly read.
-        self.assertTrue(Huomao.can_handle_url("https://www.huomao.com/123456"))
-        self.assertTrue(Huomao.can_handle_url("https://www.huomao.tv/123456"))
-        self.assertTrue(Huomao.can_handle_url("https://huomao.com/123456"))
-        self.assertTrue(Huomao.can_handle_url("https://huomao.tv/123456"))
+            # Assert that an URL without the http(s):// prefix is correctly read.
+            "www.huomao.com/123456",
+            "www.huomao.tv/123456",
+            "www.huomao.com/video/v/123456",
+            "www.huomao.tv/video/v/123456",

-        # Assert that an URL without the http(s):// prefix is correctly read.
-        self.assertTrue(Huomao.can_handle_url("www.huomao.com/123456"))
-        self.assertTrue(Huomao.can_handle_url("www.huomao.tv/123456"))
+            # Assert that an URL without the www prefix is correctly read.
+            "huomao.com/123456",
+            "huomao.tv/123456",
+            "huomao.com/video/v/123456",
+            "huomao.tv/video/v/123456",
+        ]
+        for url in should_match:
+            self.assertTrue(Huomao.can_handle_url(url))

-        # Assert that an URL without the www prefix is correctly read.
-        self.assertTrue(Huomao.can_handle_url("huomao.com/123456"))
-        self.assertTrue(Huomao.can_handle_url("huomao.tv/123456"))
+    def test_can_handle_url_negative(self):
+        should_not_match = [
+            # Assert that an URL without a room_id can't be read.
+            "http://www.huomao.com/",
+            "http://www.huomao.tv/",
+            "http://huomao.com/",
+            "http://huomao.tv/",
+            "https://www.huomao.com/",
+            "https://www.huomao.tv/",
+            "https://huomao.com/",
+            "https://huomao.tv/",
+            "www.huomao.com/",
+            "www.huomao.tv/",
+            "huomao.tv/",
+            "huomao.tv/",

-        # Assert that an URL without a room_id can't be read.
-        self.assertFalse(Huomao.can_handle_url("http://www.huomao.com/"))
-        self.assertFalse(Huomao.can_handle_url("http://www.huomao.tv/"))
-        self.assertFalse(Huomao.can_handle_url("http://huomao.com/"))
-        self.assertFalse(Huomao.can_handle_url("http://huomao.tv/"))
-        self.assertFalse(Huomao.can_handle_url("https://www.huomao.com/"))
-        self.assertFalse(Huomao.can_handle_url("https://www.huomao.tv/"))
-        self.assertFalse(Huomao.can_handle_url("https://huomao.com/"))
-        self.assertFalse(Huomao.can_handle_url("https://huomao.tv/"))
-        self.assertFalse(Huomao.can_handle_url("www.huomao.com/"))
-        self.assertFalse(Huomao.can_handle_url("www.huomao.tv/"))
-        self.assertFalse(Huomao.can_handle_url("huomao.tv/"))
-        self.assertFalse(Huomao.can_handle_url("huomao.tv/"))
-
-        # Assert that an URL without "huomao" can't be read.
-        self.assertFalse(Huomao.can_handle_url("http://www.youtube.com/123456"))
-        self.assertFalse(Huomao.can_handle_url("http://www.youtube.tv/123456"))
-        self.assertFalse(Huomao.can_handle_url("http://youtube.com/123456"))
-        self.assertFalse(Huomao.can_handle_url("http://youtube.tv/123456"))
-        self.assertFalse(Huomao.can_handle_url("https://www.youtube.com/123456"))
-        self.assertFalse(Huomao.can_handle_url("https://www.youtube.tv/123456"))
-        self.assertFalse(Huomao.can_handle_url("https://youtube.com/123456"))
-        self.assertFalse(Huomao.can_handle_url("https://youtube.tv/123456"))
+            # Assert that an URL without "huomao" can't be read.
+            "http://www.youtube.com/123456",
+            "http://www.youtube.tv/123456",
+            "http://youtube.com/123456",
+            "http://youtube.tv/123456",
+            "https://www.youtube.com/123456",
+            "https://www.youtube.tv/123456",
+            "https://youtube.com/123456",
+            "https://youtube.tv/123456",
+        ]
+        for url in should_not_match:
+            self.assertFalse(Huomao.can_handle_url(url))