mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-15 12:47:29 +01:00
149 lines
5.3 KiB
Python
149 lines
5.3 KiB
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
|
|
from .common import InfoExtractor
|
|
from ..compat import compat_str
|
|
from ..utils import (
|
|
ExtractorError,
|
|
smuggle_url,
|
|
str_or_none,
|
|
traverse_obj,
|
|
unified_strdate,
|
|
unsmuggle_url,
|
|
)
|
|
|
|
import itertools
|
|
|
|
|
|
class VoicyBaseIE(InfoExtractor):
|
|
def _extract_from_playlist_data(self, value):
|
|
voice_id = compat_str(value.get('PlaylistId'))
|
|
upload_date = unified_strdate(value.get('Published'), False)
|
|
items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
|
|
return {
|
|
'_type': 'multi_video',
|
|
'entries': items,
|
|
'id': voice_id,
|
|
'title': compat_str(value.get('PlaylistName')),
|
|
'uploader': value.get('SpeakerName'),
|
|
'uploader_id': str_or_none(value.get('SpeakerId')),
|
|
'channel': value.get('ChannelName'),
|
|
'channel_id': str_or_none(value.get('ChannelId')),
|
|
'upload_date': upload_date,
|
|
}
|
|
|
|
def _extract_single_article(self, entry):
|
|
formats = [{
|
|
'url': entry['VoiceHlsFile'],
|
|
'format_id': 'hls',
|
|
'ext': 'm4a',
|
|
'acodec': 'aac',
|
|
'vcodec': 'none',
|
|
'protocol': 'm3u8_native',
|
|
}, {
|
|
'url': entry['VoiceFile'],
|
|
'format_id': 'mp3',
|
|
'ext': 'mp3',
|
|
'acodec': 'mp3',
|
|
'vcodec': 'none',
|
|
}]
|
|
self._sort_formats(formats)
|
|
return {
|
|
'id': compat_str(entry.get('ArticleId')),
|
|
'title': entry.get('ArticleTitle'),
|
|
'description': entry.get('MediaName'),
|
|
'formats': formats,
|
|
}
|
|
|
|
def _call_api(self, url, video_id, **kwargs):
|
|
response = self._download_json(url, video_id, **kwargs)
|
|
if response.get('Status') != 0:
|
|
message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str)
|
|
if not message:
|
|
message = 'There was a error in the response: %d' % response.get('Status')
|
|
raise ExtractorError(message, expected=False)
|
|
return response.get('Value')
|
|
|
|
|
|
class VoicyIE(VoicyBaseIE):
|
|
IE_NAME = 'voicy'
|
|
_VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
|
|
ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
|
|
_TESTS = [{
|
|
'url': 'https://voicy.jp/channel/1253/122754',
|
|
'info_dict': {
|
|
'id': '122754',
|
|
'title': '1/21(木)声日記:ついに原稿終わった!!',
|
|
'uploader': 'ちょまど@ ITエンジニアなオタク',
|
|
'uploader_id': '7339',
|
|
},
|
|
'playlist_mincount': 9,
|
|
}]
|
|
|
|
def _real_extract(self, url):
|
|
mobj = self._match_valid_url(url)
|
|
assert mobj
|
|
voice_id = mobj.group('id')
|
|
channel_id = mobj.group('channel_id')
|
|
url, article_list = unsmuggle_url(url)
|
|
if not article_list:
|
|
article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
|
|
return self._extract_from_playlist_data(article_list)
|
|
|
|
|
|
class VoicyChannelIE(VoicyBaseIE):
|
|
IE_NAME = 'voicy:channel'
|
|
_VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
|
|
PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
|
|
_TESTS = [{
|
|
'url': 'https://voicy.jp/channel/1253/',
|
|
'info_dict': {
|
|
'id': '7339',
|
|
'title': 'ゆるふわ日常ラジオ #ちょまラジ',
|
|
'uploader': 'ちょまど@ ITエンジニアなオタク',
|
|
'uploader_id': '7339',
|
|
},
|
|
'playlist_mincount': 54,
|
|
}]
|
|
|
|
@classmethod
|
|
def suitable(cls, url):
|
|
return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
|
|
|
|
def _entries(self, channel_id):
|
|
pager = ''
|
|
for count in itertools.count(1):
|
|
article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count)
|
|
playlist_data = article_list.get('PlaylistData')
|
|
if not playlist_data:
|
|
break
|
|
yield from playlist_data
|
|
last = playlist_data[-1]
|
|
pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
|
|
|
|
def _real_extract(self, url):
|
|
channel_id = self._match_id(url)
|
|
articles = self._entries(channel_id)
|
|
|
|
first_article = next(articles, None)
|
|
title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str)
|
|
speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str)
|
|
if not title and speaker_name:
|
|
title = 'Uploads from %s' % speaker_name
|
|
if not title:
|
|
title = 'Uploads from channel ID %s' % channel_id
|
|
|
|
articles = itertools.chain([first_article], articles) if first_article else articles
|
|
|
|
playlist = (
|
|
self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
|
|
for value in articles)
|
|
return {
|
|
'_type': 'playlist',
|
|
'entries': playlist,
|
|
'id': channel_id,
|
|
'title': title,
|
|
'channel': speaker_name,
|
|
'channel_id': channel_id,
|
|
}
|