1
mirror of https://github.com/yt-dlp/yt-dlp synced 2024-12-27 09:23:46 +01:00

[servingsys] Add support

This also adds support for brightcove advertisements.
Fixes #2181
This commit is contained in:
Philipp Hagemeister 2014-01-21 02:09:49 +01:00
parent 9d4288b2d4
commit 7b0817e8e1
5 changed files with 121 additions and 8 deletions

View File

@ -151,6 +151,7 @@ class YoutubeDL(object):
bidi_workaround: Work around buggy terminals without bidirectional text bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi support, using fridibi
debug_printtraffic:Print out sent and received HTTP traffic debug_printtraffic:Print out sent and received HTTP traffic
include_ads: Download ads as well
The following parameters are not used by YoutubeDL itself, they are used by The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader: the FileDownloader:

View File

@ -238,7 +238,10 @@ def parseOpts(overrideArguments=None):
selection.add_option('--download-archive', metavar='FILE', selection.add_option('--download-archive', metavar='FILE',
dest='download_archive', dest='download_archive',
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
selection.add_option(
'--include-ads', dest='include_ads',
action='store_true',
help='Download advertisements as well (experimental)')
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username') dest='username', metavar='USERNAME', help='account username')
@ -716,6 +719,7 @@ def _real_main(argv=None):
'bidi_workaround': opts.bidi_workaround, 'bidi_workaround': opts.bidi_workaround,
'debug_printtraffic': opts.debug_printtraffic, 'debug_printtraffic': opts.debug_printtraffic,
'prefer_ffmpeg': opts.prefer_ffmpeg, 'prefer_ffmpeg': opts.prefer_ffmpeg,
'include_ads': opts.include_ads,
} }
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:

View File

@ -152,6 +152,7 @@ from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .servingsys import ServingSysIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE from .slashdot import SlashdotIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE

View File

@ -9,9 +9,11 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands,
compat_urlparse, compat_urlparse,
compat_str, compat_str,
compat_urllib_request, compat_urllib_request,
compat_parse_qs,
ExtractorError, ExtractorError,
unsmuggle_url, unsmuggle_url,
@ -83,17 +85,30 @@ class BrightcoveIE(InfoExtractor):
lambda m: m.group(1) + '/>', object_str) lambda m: m.group(1) + '/>', object_str)
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
object_str = object_str.replace('<--', '<!--') object_str = object_str.replace('<--', '<!--')
object_str = fix_xml_ampersands(object_str)
object_doc = xml.etree.ElementTree.fromstring(object_str) object_doc = xml.etree.ElementTree.fromstring(object_str)
assert 'BrightcoveExperience' in object_doc.attrib['class']
params = { fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'], flashvars = dict(
} (k, v[0])
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
def find_param(name): def find_param(name):
if name in flashvars:
return flashvars[name]
node = find_xpath_attr(object_doc, './param', 'name', name) node = find_xpath_attr(object_doc, './param', 'name', name)
if node is not None: if node is not None:
return node.attrib['value'] return node.attrib['value']
return None return None
params = {}
playerID = find_param('playerID')
if playerID is None:
raise ExtractorError('Cannot find player ID')
params['playerID'] = playerID
playerKey = find_param('playerKey') playerKey = find_param('playerKey')
# Not all pages define this value # Not all pages define this value
if playerKey is not None: if playerKey is not None:
@ -114,8 +129,12 @@ class BrightcoveIE(InfoExtractor):
if it can't be found if it can't be found
""" """
m_brightcove = re.search( m_brightcove = re.search(
r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', r'''(?sx)<object
webpage, re.DOTALL) (?:
:[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''',
webpage)
if m_brightcove is not None: if m_brightcove is not None:
return cls._build_brighcove_url(m_brightcove.group()) return cls._build_brighcove_url(m_brightcove.group())
else: else:
@ -156,6 +175,7 @@ class BrightcoveIE(InfoExtractor):
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
info = json.loads(info)['data'] info = json.loads(info)['data']
video_info = info['programmedContent']['videoPlayer']['mediaDTO'] video_info = info['programmedContent']['videoPlayer']['mediaDTO']
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
return self._extract_video_info(video_info) return self._extract_video_info(video_info)
@ -193,6 +213,23 @@ class BrightcoveIE(InfoExtractor):
info.update({ info.update({
'url': video_info['FLVFullLengthURL'], 'url': video_info['FLVFullLengthURL'],
}) })
else:
if self._downloader.params.get('include_ads', False):
adServerURL = video_info.get('_youtubedl_adServerURL')
if adServerURL:
ad_info = {
'_type': 'url',
'url': adServerURL,
}
if 'url' in info:
return {
'_type': 'playlist',
'title': info['title'],
'entries': [ad_info, info],
}
else:
return ad_info
if 'url' not in info:
raise ExtractorError('Unable to extract video url for %s' % info['id']) raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info

View File

@ -0,0 +1,70 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
)
class ServingSysIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
'playlist': [{
'file': '29955898.flv',
'md5': 'baed851342df6846eb8677a60a011a0f',
'info_dict': {
'title': 'AdAPPter_Hyundai_demo (1)',
'duration': 74,
'tbr': 1378,
'width': 640,
'height': 400,
},
}, {
'file': '29907998.flv',
'md5': '979b4da2655c4bc2d81aeb915a8c5014',
'info_dict': {
'title': 'AdAPPter_Hyundai_demo (2)',
'duration': 34,
'width': 854,
'height': 480,
'tbr': 516,
},
}],
'params': {
'playlistend': 2,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
pl_id = mobj.group('id')
vast_doc = self._download_xml(url, pl_id)
title = vast_doc.find('.//AdTitle').text
media = vast_doc.find('.//MediaFile').text
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
doc = self._download_xml(info_url, pl_id, 'Downloading video info')
entries = [{
'_type': 'video',
'id': a.attrib['id'],
'title': '%s (%s)' % (title, a.attrib['assetID']),
'url': a.attrib['URL'],
'duration': int_or_none(a.attrib.get('length')),
'tbr': int_or_none(a.attrib.get('bitrate')),
'height': int_or_none(a.attrib.get('height')),
'width': int_or_none(a.attrib.get('width')),
} for a in doc.findall('.//AdditionalAssets/asset')]
return {
'_type': 'playlist',
'id': pl_id,
'title': title,
'entries': entries,
}