1
mirror of https://github.com/yt-dlp/yt-dlp synced 2024-12-20 00:35:54 +01:00

Add InstagramIE (related #904)

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-07-01 21:08:54 +02:00
parent 5c44c15438
commit 59fc531f78
2 changed files with 43 additions and 0 deletions

View File

@ -25,6 +25,7 @@ from .howcast import HowcastIE
from .hypem import HypemIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .keek import KeekIE

View File

@ -0,0 +1,42 @@
import re
from .common import InfoExtractor
class InstagramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
_TEST = {
u'url': u'http://instagram.com/p/aye83DjauH/#',
u'file': u'aye83DjauH.mp4',
u'md5': u'0d2da106a9d2631273e192b372806516',
u'info_dict': {
u"uploader_id": u"naomipq",
u"title": u"Video by naomipq"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
r'<meta property="og:video" content="(.+?)"',
webpage, u'video URL')
thumbnail_url = self._html_search_regex(
r'<meta property="og:image" content="(.+?)" />',
webpage, u'thumbnail URL', fatal=False)
html_title = self._html_search_regex(
r'<title>(.+?)</title>',
webpage, u'title', flags=re.DOTALL)
title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
uploader_id = self._html_search_regex(r'content="(.*?)\'s video on Instagram',
webpage, u'uploader name', fatal=False)
ext = 'mp4'
return [{
'id': video_id,
'url': video_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
'uploader_id' : uploader_id
}]