From 2291dbce2a1aa9c5e9e060eb7ac306f037234eb1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 5 Apr 2021 13:11:21 +0530 Subject: [PATCH] [niconico] Fix HLS formats Closes #171 * The structure of the API JSON was changed * Smile Video seems to be no longer available. So remove the warning * Move ping to downloader * Change heartbeat interval to 40sec * Remove unnecessary API headers Authored-by: CXwudi, tsukumijima, nao20010128nao, pukkandan Tested by: tsukumijima --- yt_dlp/downloader/niconico.py | 11 ++-- yt_dlp/extractor/niconico.py | 97 +++++++++++++++++------------------ 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index dc49dff585..c5a3587a4b 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -24,16 +24,14 @@ class NiconicoDmcFD(FileDownloader): success = download_complete = False timer = [None] - heartbeat_lock = threading.Lock() heartbeat_url = heartbeat_info_dict['url'] - heartbeat_data = heartbeat_info_dict['data'] + heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_interval = heartbeat_info_dict.get('interval', 30) - self.to_screen('[%s] Heartbeat with %s second interval ...' % (self.FD_NAME, heartbeat_interval)) def heartbeat(): try: - compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data.encode()) + compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data) except Exception: self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) @@ -42,13 +40,16 @@ class NiconicoDmcFD(FileDownloader): timer[0] = threading.Timer(heartbeat_interval, heartbeat) timer[0].start() + heartbeat_info_dict['ping']() + self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) try: heartbeat() + if type(fd).__name__ == 'HlsFD': + info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) success = fd.real_download(filename, info_dict) finally: if heartbeat_lock: with heartbeat_lock: timer[0].cancel() download_complete = True - return success diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 91924b318e..126aa4530c 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -164,6 +164,11 @@ class NiconicoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' + _API_HEADERS = { + 'X-Frontend-ID': '6', + 'X-Frontend-Version': '0' + } + def _real_initialize(self): self._login() @@ -197,46 +202,48 @@ class NiconicoIE(InfoExtractor): video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/') - # Get video webpage for API data. - webpage, handle = self._download_webpage_handle( - 'http://www.nicovideo.jp/watch/' + video_id, video_id) - - api_data = self._parse_json(self._html_search_regex( - 'data-api-data="([^"]+)"', webpage, - 'API data', default='{}'), video_id) + api_data = ( + info_dict.get('_api_data') + or self._parse_json( + self._html_search_regex( + 'data-api-data="([^"]+)"', + self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id), + 'API data', default='{}'), + video_id)) session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session']) session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0]) - # ping - self._download_json( - 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id, - query={'t': try_get(api_data, lambda x: x['video']['dmcInfo']['tracking_id'])}, - headers={ - 'Origin': 'https://www.nicovideo.jp', - 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, - 'X-Frontend-Id': '6', - 'X-Frontend-Version': '0' - }) + def ping(): + status = try_get( + self._download_json( + 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id, + query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])}, + note='Acquiring permission for downloading video', + headers=self._API_HEADERS), + lambda x: x['meta']['status']) + if status != 200: + self.report_warning('Failed to acquire permission for playing video. The video may not download.') yesno = lambda x: 'yes' if x else 'no' # m3u8 (encryption) - if 'encryption' in (try_get(api_data, lambda x: x['media']['delivery']['movie']) or {}): + if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None: protocol = 'm3u8' + encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption'] session_api_http_parameters = { 'parameters': { 'hls_parameters': { 'encryption': { - 'hls_encryption_v1': { - 'encrypted_key': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['encrypted_key']), - 'key_uri': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['key_uri']) + encryption: { + 'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']), + 'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri']) } }, 'transfer_preset': '', - 'use_ssl': yesno(session_api_endpoint['is_ssl']), - 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']), - 'segment_duration': 6000 + 'use_ssl': yesno(session_api_endpoint['isSsl']), + 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']), + 'segment_duration': 6000, } } } @@ -310,7 +317,8 @@ class NiconicoIE(InfoExtractor): 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT', 'data': json.dumps(session_response['data']), # interval, convert milliseconds to seconds, then halve to make a buffer. - 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=2000), + 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000), + 'ping': ping } return info_dict, heartbeat_info_dict @@ -400,7 +408,7 @@ class NiconicoIE(InfoExtractor): # Get HTML5 videos info quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie']) if not quality_info: - raise ExtractorError('The video can\'t downloaded.', expected=True) + raise ExtractorError('The video can\'t be downloaded', expected=True) for audio_quality in quality_info.get('audios') or {}: for video_quality in quality_info.get('videos') or {}: @@ -412,9 +420,7 @@ class NiconicoIE(InfoExtractor): # Get flv/swf info timestamp = None video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url']) - if not video_real_url: - self.report_warning('Unable to obtain smile video information') - else: + if video_real_url: is_economy = video_real_url.endswith('low') if is_economy: @@ -486,9 +492,6 @@ class NiconicoIE(InfoExtractor): 'filesize': filesize }) - if len(formats) == 0: - raise ExtractorError('Unable to find video info.') - self._sort_formats(formats) # Start extracting information @@ -585,6 +588,7 @@ class NiconicoIE(InfoExtractor): return { 'id': video_id, + '_api_data': api_data, 'title': title, 'formats': formats, 'thumbnail': thumbnail, @@ -619,24 +623,19 @@ class NiconicoPlaylistIE(InfoExtractor): 'only_matching': True, }] + _API_HEADERS = { + 'X-Frontend-ID': '6', + 'X-Frontend-Version': '0' + } + def _real_extract(self, url): list_id = self._match_id(url) - webpage = self._download_webpage(url, list_id) - - header = self._parse_json(self._html_search_regex( - r'data-common-header="([^"]+)"', webpage, - 'webpage header'), list_id) - frontendId = header.get('initConfig').get('frontendId') - frontendVersion = header.get('initConfig').get('frontendVersion') def get_page_data(pagenum, pagesize): return self._download_json( 'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id, query={'page': 1 + pagenum, 'pageSize': pagesize}, - headers={ - 'X-Frontend-Id': frontendId, - 'X-Frontend-Version': frontendVersion, - }).get('data').get('mylist') + headers=self._API_HEADERS).get('data').get('mylist') data = get_page_data(0, 1) title = data.get('name') @@ -672,20 +671,20 @@ class NiconicoUserIE(InfoExtractor): 'playlist_mincount': 101, } _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s" - _api_headers = { - 'X-Frontend-ID': '6', - 'X-Frontend-Version': '0', - 'X-Niconico-Language': 'en-us' - } _PAGE_SIZE = 100 + _API_HEADERS = { + 'X-Frontend-ID': '6', + 'X-Frontend-Version': '0' + } + def _entries(self, list_id, ): total_count = 1 count = page_num = 0 while count < total_count: json_parsed = self._download_json( self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id, - headers=self._api_headers, + headers=self._API_HEADERS, note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else '')) if not page_num: total_count = int_or_none(json_parsed['data'].get('totalCount'))