diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py index fe1582d1a6..3443f19c5f 100644 --- a/youtube_dl/extractor/exfm.py +++ b/youtube_dl/extractor/exfm.py @@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor): IE_NAME = u'exfm' IE_DESC = u'ex.fm' _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)' - _SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' - _TEST = { - u'url': u'http://ex.fm/song/1bgtzg', - u'file': u'1bgtzg.mp3', - u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', - u'info_dict': { - u"title": u"We Can't Stop", - u"uploader": u"Miley Cyrus", - u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37' - } - } + _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' + _TESTS = [ + { + u'url': u'http://ex.fm/song/1bgtzg', + u'file': u'95223130.mp3', + u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', + u'info_dict': { + u"title": u"We Can't Stop - Miley Cyrus", + u"uploader": u"Miley Cyrus", + u'upload_date': u'20130603', + u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC', + }, + u'note': u'Soundcloud song', + }, + { + u'url': u'http://ex.fm/song/wddt8', + u'file': u'wddt8.mp3', + u'md5': u'966bd70741ac5b8570d8e45bfaed3643', + u'info_dict': { + u'title': u'Safe and Sound', + u'uploader': u'Capital Cities', + }, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor): info_url = "http://ex.fm/api/v3/song/%s" %(song_id) webpage = self._download_webpage(info_url, song_id) info = json.loads(webpage) - song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url']) - if song_url is not None: - song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28" - else: - song_url = info['song']['url'] + song_url = info['song']['url'] + if re.match(self._SOUNDCLOUD_URL, song_url) is not None: + self.to_screen('Soundcloud song detected') + return self.url_result(song_url.replace('/stream',''), 'Soundcloud') return [{ 'id': song_id, 'url': song_url, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 54ff8db12e..7c9f1c6b65 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -19,7 +19,11 @@ class SoundcloudIE(InfoExtractor): of the stream token and uid """ - _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$' + _VALID_URL = r'''^(?:https?://)? + (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$) + |(?:api\.soundcloud\.com/tracks/(?P\d+)) + ) + ''' IE_NAME = u'soundcloud' _TEST = { u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', @@ -35,6 +39,10 @@ class SoundcloudIE(InfoExtractor): _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' + @classmethod + def suitable(cls, url): + return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None + def report_resolve(self, video_id): """Report information extraction.""" self.to_screen(u'%s: Resolving id' % video_id) @@ -63,21 +71,26 @@ class SoundcloudIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) - # extract uploader (which is in the url) - uploader = mobj.group(1) - # extract simple title (uploader + slug of song title) - slug_title = mobj.group(2) - full_title = '%s/%s' % (uploader, slug_title) - - self.report_resolve(full_title) - - url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) - resolv_url = self._resolv_url(url) - info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON') + track_id = mobj.group('track_id') + if track_id is not None: + info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID + full_title = track_id + else: + # extract uploader (which is in the url) + uploader = mobj.group(1) + # extract simple title (uploader + slug of song title) + slug_title = mobj.group(2) + full_title = '%s/%s' % (uploader, slug_title) + + self.report_resolve(full_title) + + url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title) + info_json_url = self._resolv_url(url) + info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON') info = json.loads(info_json) return self._extract_info_dict(info, full_title)