1
mirror of https://github.com/yt-dlp/yt-dlp synced 2025-01-15 12:47:29 +01:00
yt-dlp/youtube_dl/extractor/pladform.py

100 lines
3.3 KiB
Python
Raw Normal View History

2015-03-08 13:03:12 +01:00
# coding: utf-8
from __future__ import unicode_literals
2015-12-07 17:02:45 +01:00
import re
2015-03-08 13:03:12 +01:00
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
xpath_text,
2015-03-08 13:09:47 +01:00
qualities,
2015-03-08 13:03:12 +01:00
)
class PladformIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
(?:
out\.pladform\.ru/player|
static\.pladform\.ru/player\.swf
)
\?.*\bvideoid=|
video\.pladform\.ru/catalog/video/videoid/
)
(?P<id>\d+)
'''
_TESTS = [{
# http://muz-tv.ru/kinozal/view/7400/
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
'info_dict': {
'id': '100183293',
'ext': 'mp4',
2015-04-16 17:33:01 +02:00
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
2015-03-08 13:03:12 +01:00
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
},
}, {
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
'only_matching': True,
}, {
'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
'only_matching': True,
}]
2015-12-07 17:02:45 +01:00
@staticmethod
def _extract_url(webpage):
mobj = re.search(
2016-06-30 18:19:29 +02:00
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
2015-12-07 17:02:45 +01:00
if mobj:
return mobj.group('url')
2015-03-08 13:03:12 +01:00
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_xml(
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
video_id)
if video.tag == 'error':
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, video.text),
expected=True)
2015-03-08 13:09:47 +01:00
quality = qualities(('ld', 'sd', 'hd'))
2015-03-08 13:03:12 +01:00
formats = [{
'url': src.text,
'format_id': src.get('quality'),
2015-03-08 13:09:47 +01:00
'quality': quality(src.get('quality')),
2015-03-08 13:03:12 +01:00
} for src in video.findall('./src')]
self._sort_formats(formats)
webpage = self._download_webpage(
'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
video_id)
title = self._og_search_title(webpage, fatal=False) or xpath_text(
video, './/title', 'title', fatal=True)
description = self._search_regex(
r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
video, './/cover', 'cover')
duration = int_or_none(xpath_text(video, './/time', 'duration'))
age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
'formats': formats,
}