yt-dlp/yt_dlp/extractor/dropbox.py

import base64
import os.path
import re

from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
    ExtractorError,
    update_url_query,
    url_basename,
)


class DropboxIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
    _TESTS = [
        {
            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
            'info_dict': {
                'id': 'nelirfsxnmcfbfh',
                'ext': 'mp4',
                'title': 'youtube-dl test video \'ä"BaW_jenozKc'
            }
        }, {
            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
            'only_matching': True,
        }, {
            'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
            'only_matching': True,
        }, {
            'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
            'only_matching': True,
        }, {
            'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
            'only_matching': True,
        },
    ]

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        fn = compat_urllib_parse_unquote(url_basename(url))
        title = os.path.splitext(fn)[0]

        password = self.get_param('videopassword')
        if (self._og_search_title(webpage) == 'Dropbox - Password Required'
                or 'Enter the password for this link' in webpage):

            if password:
                content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
                payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
                response = self._download_json(
                    'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode('UTF-8'),
                    headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})

                if response.get('status') != 'authed':
                    raise ExtractorError('Authentication failed!', expected=True)
                webpage = self._download_webpage(url, video_id)
            elif self._get_cookies('https://dropbox.com').get('sm_auth'):
                webpage = self._download_webpage(url, video_id)
            else:
                raise ExtractorError('Password protected video, use --video-password <password>', expected=True)

        formats, subtitles, has_anonymous_download = [], {}, False
        for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
            decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
            if not has_anonymous_download:
                has_anonymous_download = self._search_regex(
                    r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
            transcode_url = self._search_regex(
                r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
            if not transcode_url:
                continue
            formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
            break

        # downloads enabled we can get the original file
        if has_anonymous_download:
            formats.append({
                'url': update_url_query(url, {'dl': '1'}),
                'format_id': 'original',
                'format_note': 'Original',
                'quality': 1
            })

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'subtitles': subtitles
        }
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`import base64`
[dropbox] Correct test case (#2171) 2014-01-19 06:16:40 +01:00			`import os.path`
Added dropbox support. issue #2055 2014-01-18 16:15:53 +01:00			`import re`

			`from .common import InfoExtractor`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 11:23:40 +01:00			`from ..compat import compat_urllib_parse_unquote`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`from ..utils import (`
			`ExtractorError,`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`update_url_query,`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`url_basename,`
			`)`
Added dropbox support. issue #2055 2014-01-18 16:15:53 +01:00
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00
Added support for Dropbox 2014-01-19 05:50:26 +01:00			`class DropboxIE(InfoExtractor):`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi\|sh?)/(?P<id>\w+)'`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`_TESTS = [`
			`{`
Completely change project name to yt-dlp (#85) * All modules and binary names are changed * All documentation references changed * yt-dlp no longer loads youtube-dlc config files * All URLs changed to point to organization account Co-authored-by: Pccode66 Co-authored-by: pukkandan 2021-02-24 19:45:56 +01:00			`'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`'info_dict': {`
			`'id': 'nelirfsxnmcfbfh',`
			`'ext': 'mp4',`
Completely change project name to yt-dlp (#85) * All modules and binary names are changed * All documentation references changed * yt-dlp no longer loads youtube-dlc config files * All URLs changed to point to organization account Co-authored-by: Pccode66 Co-authored-by: pukkandan 2021-02-24 19:45:56 +01:00			`'title': 'youtube-dl test video \'ä"BaW_jenozKc'`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`}`
			`}, {`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`'only_matching': True,`
			`},`
[dropbox] Recognize 'https://www.dropbox.com/sh/*' urls (fixes #3795) And extract the title from the url last path component. 2014-09-21 13:40:22 +02:00			`]`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00
			`def _real_extract(self, url):`
[extractor] Common function `_match_valid_url` 2021-08-19 03:41:24 +02:00			`mobj = self._match_valid_url(url)`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00			`video_id = mobj.group('id')`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`webpage = self._download_webpage(url, video_id)`
[dropbox] Recognize 'https://www.dropbox.com/sh/*' urls (fixes #3795) And extract the title from the url last path component. 2014-09-21 13:40:22 +02:00			`fn = compat_urllib_parse_unquote(url_basename(url))`
[dropbox] Fix test and add support for spaces in filenames 2014-07-21 12:57:40 +02:00			`title = os.path.splitext(fn)[0]`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00
			`password = self.get_param('videopassword')`
			`if (self._og_search_title(webpage) == 'Dropbox - Password Required'`
			`or 'Enter the password for this link' in webpage):`

			`if password:`
			`content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')`
			`payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'`
			`response = self._download_json(`
			`'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode('UTF-8'),`
			`headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'})`

			`if response.get('status') != 'authed':`
			`raise ExtractorError('Authentication failed!', expected=True)`
			`webpage = self._download_webpage(url, video_id)`
			`elif self._get_cookies('https://dropbox.com').get('sm_auth'):`
			`webpage = self._download_webpage(url, video_id)`
			`else:`
			`raise ExtractorError('Password protected video, use --video-password <password>', expected=True)`

[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`formats, subtitles, has_anonymous_download = [], {}, False`
			`for encoded in reversed(re.findall(r'registerStreamedPrefetch\s\(\s"[\w/+=]+"\s,\s"([\w/+=]+)"', webpage)):`
			`decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')`
[ie/dropbox] Fix formats extraction (#9627) Closes #9533 Authored by: bashonly 2024-04-06 19:19:44 +02:00			`if not has_anonymous_download:`
			`has_anonymous_download = self._search_regex(`
			`r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`transcode_url = self._search_regex(`
Bugfix for b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8 Authored by: bashonly 2023-08-29 15:06:02 +02:00			`r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`if not transcode_url:`
			`continue`
Bugfix for b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8 Authored by: bashonly 2023-08-29 15:06:02 +02:00			`formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`break`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00
			`# downloads enabled we can get the original file`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`if has_anonymous_download:`
			`formats.append({`
			`'url': update_url_query(url, {'dl': '1'}),`
			`'format_id': 'original',`
			`'format_note': 'Original',`
			`'quality': 1`
			`})`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00
			`return {`
			`'id': video_id,`
			`'title': title,`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`'formats': formats,`
			`'subtitles': subtitles`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00			`}`