[cleanup] Misc fixes

Closes #3565, https://github.com/yt-dlp/yt-dlp/issues/3514#issuecomment-1105944364
2025-02-19 23:33:18 +01:00 · 2022-04-29 07:18:36 +05:30 · 2022-04-29 07:18:36 +05:30 · 1d485a1a79
commit 1d485a1a79
parent 0a41f331cc
19 changed files with 75 additions and 42 deletions
--- a/devscripts/lazy_load_template.py
+++ b/devscripts/lazy_load_template.py
@ -7,7 +7,7 @@ class LazyLoadMetaClass(type):
    def __getattr__(cls, name):
        if '_real_class' not in cls.__dict__:
            write_string(
-                f'WARNING: Falling back to normal extractor since lazy extractor '
+                'WARNING: Falling back to normal extractor since lazy extractor '
                f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
        return getattr(cls._get_real_class(), name)

--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -62,6 +62,7 @@ from .utils import (
    DEFAULT_OUTTMPL,
    LINK_TEMPLATES,
    NO_DEFAULT,
+    NUMBER_RE,
    OUTTMPL_TYPES,
    POSTPROCESS_WHEN,
    STR_FORMAT_RE_TMPL,
@ -1049,7 +1050,7 @@ class YoutubeDL:
            formatSeconds(info_dict['duration'], '-' if sanitize else ':')
            if info_dict.get('duration', None) is not None
            else None)
-        info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+        info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
        info_dict['video_autonumber'] = self._num_videos
        if info_dict.get('resolution') is None:
            info_dict['resolution'] = self.format_resolution(info_dict, default=None)
@ -1071,18 +1072,18 @@ class YoutubeDL:
        # Field is of the form key1.key2...
        # where keys (except first) can be string, int or slice
        FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
-        MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+        MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
        MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
-        INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+        INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
            (?P<negate>-)?
-            (?P<fields>{field})
-            (?P<maths>(?:{math_op}{math_field})*)
+            (?P<fields>{FIELD_RE})
+            (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
            (?:>(?P<strf_format>.+?))?
            (?P<remaining>
                (?P<alternate>(?<!\\),[^|&)]+)?
                (?:&(?P<replacement>.*?))?
                (?:\|(?P<default>.*?))?
-            )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+            )$''')

        def _traverse_infodict(k):
            k = k.split('.')
@ -2336,7 +2337,7 @@ class YoutubeDL:
                                     video_id=info_dict['id'], ie=info_dict['extractor'])
            elif not info_dict.get('title'):
                self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
-                info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
+                info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

        if info_dict.get('duration') is not None:
            info_dict['duration_string'] = formatSeconds(info_dict['duration'])
@ -3669,10 +3670,11 @@ class YoutubeDL:
        ) or 'none'
        write_debug('exe versions: %s' % exe_str)

+        from .compat.compat_utils import get_package_info
        from .dependencies import available_dependencies

        write_debug('Optional libraries: %s' % (', '.join(sorted({
-            module.__name__.split('.')[0] for module in available_dependencies.values()
+            join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
        })) or 'none'))

        self._setup_opener()
--- a/yt_dlp/compat/init.py
+++ b/yt_dlp/compat/init.py
@ -46,10 +46,6 @@ def compat_ord(c):
    return c if isinstance(c, int) else ord(c)


-def compat_setenv(key, value, env=os.environ):
-    env[key] = value
-
-
 if compat_os_name == 'nt' and sys.version_info < (3, 8):
    # os.path.realpath on Windows does not follow symbolic links
    # prior to Python 3.8 (see https://bugs.python.org/issue9949)
--- a/yt_dlp/compat/_deprecated.py
+++ b/yt_dlp/compat/_deprecated.py
@ -44,4 +44,9 @@ compat_urllib_parse_urlparse = urllib.parse.urlparse
 compat_urllib_request = urllib.request
 compat_urlparse = compat_urllib_parse = urllib.parse

+
+def compat_setenv(key, value, env=os.environ):
+    env[key] = value
+
+
 __all__ = [x for x in globals() if x.startswith('compat_')]
--- a/yt_dlp/compat/asyncio.py
+++ b/yt_dlp/compat/asyncio.py
@ -1,5 +1,4 @@
 # flake8: noqa: F405
-
 from asyncio import *  # noqa: F403

 from .compat_utils import passthrough_module
--- a/yt_dlp/compat/compat_utils.py
+++ b/yt_dlp/compat/compat_utils.py
@ -1,9 +1,28 @@
+import collections
 import contextlib
 import importlib
 import sys
 import types


+_NO_ATTRIBUTE = object()
+
+_Package = collections.namedtuple('Package', ('name', 'version'))
+
+
+def get_package_info(module):
+    parent = module.__name__.split('.')[0]
+    parent_module = None
+    with contextlib.suppress(ImportError):
+        parent_module = importlib.import_module(parent)
+
+    for attr in ('__version__', 'version_string', 'version'):
+        version = getattr(parent_module, attr, None)
+        if version is not None:
+            break
+    return _Package(getattr(module, '_yt_dlp__identifier', parent), str(version))
+
+
 def _is_package(module):
    try:
        module.__getattribute__('__path__')
@ -12,9 +31,6 @@ def _is_package(module):
    return True


-_NO_ATTRIBUTE = object()
-
-
 def passthrough_module(parent, child, *, callback=lambda _: None):
    parent_module = importlib.import_module(parent)
    child_module = importlib.import_module(child, parent)
--- a/yt_dlp/compat/re.py
+++ b/yt_dlp/compat/re.py
@ -1,5 +1,4 @@
 # flake8: noqa: F405
-
 from re import *  # F403

 from .compat_utils import passthrough_module
--- a/yt_dlp/dependencies.py
+++ b/yt_dlp/dependencies.py
@ -1,4 +1,6 @@
 # flake8: noqa: F401
+"""Imports all optional dependencies for the project.
+An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambigious namespace"""

 try:
    import brotlicffi as brotli
@ -28,6 +30,15 @@ except ImportError:
        from Crypto.Cipher import AES as Cryptodome_AES
    except ImportError:
        Cryptodome_AES = None
+    else:
+        try:
+            # In pycrypto, mode defaults to ECB. See:
+            # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
+            Cryptodome_AES.new(b'abcdefghijklmnop')
+        except TypeError:
+            pass
+        else:
+            Cryptodome_AES._yt_dlp__identifier = 'pycrypto'


 try:
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@ -12,6 +12,7 @@ from ..minicurses import (
    QuietMultilinePrinter,
 )
 from ..utils import (
+    NUMBER_RE,
    LockingUnsupportedError,
    Namespace,
    decodeArgument,
@ -91,7 +92,8 @@ class FileDownloader:
            'trouble',
            'write_debug',
        ):
-            setattr(self, func, getattr(ydl, func))
+            if not hasattr(self, func):
+                setattr(self, func, getattr(ydl, func))

    def to_screen(self, *args, **kargs):
        self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
@ -170,7 +172,7 @@ class FileDownloader:
    @staticmethod
    def parse_bytes(bytestr):
        """Parse a string indicating a byte quantity into an integer."""
-        matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
+        matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
        if matchobj is None:
            return None
        number = float(matchobj.group(1))
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@ -368,7 +368,7 @@ class FFmpegFD(ExternalFD):

        # These exists only for compatibility. Extractors should use
        # info_dict['downloader_options']['ffmpeg_args'] instead
-        args += info_dict.get('_ffmpeg_args')
+        args += info_dict.get('_ffmpeg_args') or []
        seekable = info_dict.get('_seekable')
        if seekable is not None:
            # setting -seekable prevents ffmpeg from guessing if the server
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@ -1,3 +1,4 @@
+import concurrent.futures
 import contextlib
 import http.client
 import json
@ -5,12 +6,6 @@ import math
 import os
 import time

-try:
-    import concurrent.futures
-    can_threaded_download = True
-except ImportError:
-    can_threaded_download = False
-
 from .common import FileDownloader
 from .http import HttpFD
 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
@ -28,6 +23,8 @@ class HttpQuietDownloader(HttpFD):
    def to_screen(self, *args, **kargs):
        pass

+    console_title = to_screen
+
    def report_retry(self, err, count, retries):
        super().to_screen(
            f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
@ -501,8 +498,7 @@ class FragmentFD(FileDownloader):

        max_workers = math.ceil(
            self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
-        if can_threaded_download and max_workers > 1:
-
+        if max_workers > 1:
            def _download_fragment(fragment):
                ctx_copy = ctx.copy()
                download_fragment(fragment, ctx_copy)
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@ -173,7 +173,7 @@ body > figure > img {
                mime_type = b'image/png'
            if frag_content.startswith((b'GIF87a', b'GIF89a')):
                mime_type = b'image/gif'
-            if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
+            if frag_content.startswith(b'RIFF') and frag_content[8:12] == b'WEBP':
                mime_type = b'image/webp'

            frag_header = io.BytesIO()
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -1922,8 +1922,7 @@ class InfoExtractor:
    def _sort_formats(self, formats, field_preference=[]):
        if not formats:
            return
-        format_sort = self.FormatSort(self, field_preference)
-        formats.sort(key=lambda f: format_sort.calculate_preference(f))
+        formats.sort(key=self.FormatSort(self, field_preference).calculate_preference)

    def _check_formats(self, formats, video_id):
        if formats:
--- a/yt_dlp/extractor/fujitv.py
+++ b/yt_dlp/extractor/fujitv.py
@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
        'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
        'info_dict': {
            'id': '5d40110076',
-            'ext': 'mp4',
+            'ext': 'ts',
            'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
            'series': 'ちびまる子ちゃん',
            'series_id': '5d40',
@ -28,7 +28,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
        'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083',
        'info_dict': {
            'id': '5d40810083',
-            'ext': 'mp4',
+            'ext': 'ts',
            'title': '#1324 『まる子とオニの子』の巻／『結成！2月をムダにしない会』の巻',
            'description': 'md5:3972d900b896adc8ab1849e310507efa',
            'series': 'ちびまる子ちゃん',
@ -51,7 +51,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
        for src in src_json['video_selector']:
            if not src.get('url'):
                continue
-            fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4')
+            fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts')
            for f in fmt:
                f.update(dict(zip(('height', 'width'),
                                  self._BITRATE_MAP.get(f.get('tbr'), ()))))
--- a/yt_dlp/extractor/funimation.py
+++ b/yt_dlp/extractor/funimation.py
@ -242,6 +242,9 @@ class FunimationIE(FunimationBaseIE):
                        'language_preference': language_preference(lang.lower()),
                    })
                formats.extend(current_formats)
+        if not formats and (requested_languages or requested_versions):
+            self.raise_no_formats(
+                'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id)
        self._remove_duplicate_formats(formats)
        self._sort_formats(formats, ('lang', 'source'))

--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -3107,7 +3107,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
                except ExtractorError as e:
                    self.report_warning(
-                        f'nsig extraction failed: You may experience throttling for some formats\n'
+                        'nsig extraction failed: You may experience throttling for some formats\n'
                        f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
                    throttled = True

--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@ -79,9 +79,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor):

        original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath']

-        thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
        # Convert unsupported thumbnail formats (see #25687, #25717)
        # PNG is preferred since JPEG is lossy
+        thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
        if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'):
            thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
            thumbnail_ext = 'png'
@ -100,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
        elif info['ext'] in ['mkv', 'mka']:
            options = list(self.stream_copy_opts())

-            mimetype = 'image/%s' % ('jpeg' if thumbnail_ext in ('jpg', 'jpeg') else thumbnail_ext)
+            mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}'
            old_stream, new_stream = self.get_stream_number(
                filename, ('tags', 'mimetype'), mimetype)
            if old_stream is not None:
--- a/yt_dlp/postprocessor/xattrpp.py
+++ b/yt_dlp/postprocessor/xattrpp.py
@ -1,3 +1,5 @@
+import os
+
 from .common import PostProcessor
 from ..compat import compat_os_name
 from ..utils import (
@ -28,6 +30,7 @@ class XAttrMetadataPP(PostProcessor):
        self.to_screen('Writing metadata to file\'s xattrs')

        filename = info['filepath']
+        mtime = os.stat(filename).st_mtime

        try:
            xattr_mapping = {
@ -53,8 +56,6 @@ class XAttrMetadataPP(PostProcessor):
                    write_xattr(filename, xattrname, byte_value)
                    num_written += 1

-            return [], info
-
        except XAttrUnavailableError as e:
            raise PostProcessingError(str(e))

@ -73,4 +74,6 @@ class XAttrMetadataPP(PostProcessor):
                else:
                    msg += '(You may have to enable them in your /etc/fstab)'
                raise PostProcessingError(str(e))
-            return [], info
+
+        self.try_utime(filename, mtime, mtime)
+        return [], info
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@ -245,6 +245,8 @@ DATE_FORMATS_MONTH_FIRST.extend([
 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'

+NUMBER_RE = r'\d+(?:\.\d+)?'
+

 def preferredencoding():
    """Get preferred encoding.
@ -3427,7 +3429,7 @@ def parse_dfxp_time_expr(time_expr):
    if not time_expr:
        return

-    mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+    mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
    if mobj:
        return float(mobj.group('time_offset'))