1
mirror of https://github.com/yt-dlp/yt-dlp synced 2024-12-13 09:43:56 +01:00
yt-dlp/youtube_dl/extractor/hearthisat.py
Sergey M? 5c2266df4b Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8
2015-11-23 21:56:23 +06:00

116 lines
4.2 KiB
Python

# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
HEADRequest,
sanitized_Request,
str_to_int,
urlencode_postdata,
urlhandle_detect_ext,
)
class HearThisAtIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
_TEST = {
'url': 'https://hearthis.at/moofi/dr-kreep',
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
'info_dict': {
'id': '150939',
'ext': 'wav',
'title': 'Moofi - Dr. Kreep',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1421564134,
'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
'upload_date': '20150118',
'comment_count': int,
'view_count': int,
'like_count': int,
'duration': 71,
'categories': ['Experimental'],
}
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
webpage = self._download_webpage(url, display_id)
track_id = self._search_regex(
r'intTrackId\s*=\s*(\d+)', webpage, 'track ID')
payload = urlencode_postdata({'tracks[]': track_id})
req = sanitized_Request(self._PLAYLIST_URL, payload)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
track = self._download_json(req, track_id, 'Downloading playlist')[0]
title = '{artist:s} - {title:s}'.format(**track)
categories = None
if track.get('category'):
categories = [track['category']]
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
meta_span = r'<span[^>]+class="%s".*?</i>([^<]+)</span>'
view_count = str_to_int(self._search_regex(
meta_span % 'plays_count', webpage, 'view count', fatal=False))
like_count = str_to_int(self._search_regex(
meta_span % 'likes_count', webpage, 'like count', fatal=False))
comment_count = str_to_int(self._search_regex(
meta_span % 'comment_count', webpage, 'comment count', fatal=False))
duration = str_to_int(self._search_regex(
r'data-length="(\d+)', webpage, 'duration', fatal=False))
timestamp = str_to_int(self._search_regex(
r'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage, 'timestamp', fatal=False))
formats = []
mp3_url = self._search_regex(
r'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"',
webpage, 'mp3 URL', fatal=False)
if mp3_url:
formats.append({
'format_id': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
'url': mp3_url,
})
download_path = self._search_regex(
r'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"',
webpage, 'download URL', default=None)
if download_path:
download_url = compat_urlparse.urljoin(url, download_path)
ext_req = HEADRequest(download_url)
ext_handle = self._request_webpage(
ext_req, display_id, note='Determining extension')
ext = urlhandle_detect_ext(ext_handle)
formats.append({
'format_id': 'download',
'vcodec': 'none',
'ext': ext,
'url': download_url,
'preference': 2, # Usually better quality
})
self._sort_formats(formats)
return {
'id': track_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'description': description,
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'like_count': like_count,
'categories': categories,
}