[devscripts] Script to generate changelog (#6220)

Authored by: Grub4K
2024-12-17 19:35:55 +01:00 · 2023-03-03 22:31:41 +05:30 · 2023-03-03 22:31:41 +05:30 · d400e261cf
commit d400e261cf
parent 9acf1ee25f
4 changed files with 593 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -311,10 +311,13 @@ If you wish to build it anyway, install Python and py2exe, and then simply run `

 ### Related scripts

-* **`devscripts/update-version.py [revision]`** - Update the version number based on current date
-* **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable
+* **`devscripts/update-version.py`** - Update the version number based on current date.
+* **`devscripts/set-variant.py`** - Set the build variant of the executable.
+* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
 * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.

+Note: See their `--help` for more info.
+
 You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release

 # USAGE AND OPTIONS
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@ -0,0 +1 @@
+{}
--- a/devscripts/changelog_override.schema.json
+++ b/devscripts/changelog_override.schema.json
@ -0,0 +1,96 @@
+{
+    "$schema": "http://json-schema.org/draft/2020-12/schema",
+    "type": "array",
+    "uniqueItems": true,
+    "items": {
+        "type": "object",
+        "oneOf": [
+            {
+                "type": "object",
+                "properties": {
+                    "action": {
+                        "enum": [
+                            "add"
+                        ]
+                    },
+                    "when": {
+                        "type": "string",
+                        "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$"
+                    },
+                    "hash": {
+                        "type": "string",
+                        "pattern": "^[0-9a-f]{40}$"
+                    },
+                    "short": {
+                        "type": "string"
+                    },
+                    "authors": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "required": [
+                    "action",
+                    "short"
+                ]
+            },
+            {
+                "type": "object",
+                "properties": {
+                    "action": {
+                        "enum": [
+                            "remove"
+                        ]
+                    },
+                    "when": {
+                        "type": "string",
+                        "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$"
+                    },
+                    "hash": {
+                        "type": "string",
+                        "pattern": "^[0-9a-f]{40}$"
+                    }
+                },
+                "required": [
+                    "action",
+                    "hash"
+                ]
+            },
+            {
+                "type": "object",
+                "properties": {
+                    "action": {
+                        "enum": [
+                            "change"
+                        ]
+                    },
+                    "when": {
+                        "type": "string",
+                        "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$"
+                    },
+                    "hash": {
+                        "type": "string",
+                        "pattern": "^[0-9a-f]{40}$"
+                    },
+                    "short": {
+                        "type": "string"
+                    },
+                    "authors": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "required": [
+                    "action",
+                    "hash",
+                    "short",
+                    "authors"
+                ]
+            }
+        ]
+    }
+}
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@ -0,0 +1,491 @@
+from __future__ import annotations
+
+import enum
+import itertools
+import json
+import logging
+import re
+import subprocess
+import sys
+from collections import defaultdict
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+
+BASE_URL = 'https://github.com'
+LOCATION_PATH = Path(__file__).parent
+
+logger = logging.getLogger(__name__)
+
+
+class CommitGroup(enum.Enum):
+    UPSTREAM = None
+    PRIORITY = 'Important'
+    CORE = 'Core'
+    EXTRACTOR = 'Extractor'
+    DOWNLOADER = 'Downloader'
+    POSTPROCESSOR = 'Postprocessor'
+    MISC = 'Misc.'
+
+    @classmethod
+    @lru_cache
+    def commit_lookup(cls):
+        return {
+            name: group
+            for group, names in {
+                cls.PRIORITY: {''},
+                cls.UPSTREAM: {'upstream'},
+                cls.CORE: {
+                    'aes',
+                    'cache',
+                    'compat_utils',
+                    'compat',
+                    'cookies',
+                    'core',
+                    'dependencies',
+                    'jsinterp',
+                    'outtmpl',
+                    'plugins',
+                    'update',
+                    'utils',
+                },
+                cls.MISC: {
+                    'build',
+                    'cleanup',
+                    'devscripts',
+                    'docs',
+                    'misc',
+                    'test',
+                },
+                cls.EXTRACTOR: {'extractor', 'extractors'},
+                cls.DOWNLOADER: {'downloader'},
+                cls.POSTPROCESSOR: {'postprocessor'},
+            }.items()
+            for name in names
+        }
+
+    @classmethod
+    def get(cls, value):
+        result = cls.commit_lookup().get(value)
+        if result:
+            logger.debug(f'Mapped {value!r} => {result.name}')
+        return result
+
+
+@dataclass
+class Commit:
+    hash: str | None
+    short: str
+    authors: list[str]
+
+    def __str__(self):
+        result = f'{self.short!r}'
+
+        if self.hash:
+            result += f' ({self.hash[:7]})'
+
+        if self.authors:
+            authors = ', '.join(self.authors)
+            result += f' by {authors}'
+
+        return result
+
+
+@dataclass
+class CommitInfo:
+    details: str | None
+    sub_details: tuple[str, ...]
+    message: str
+    issues: list[str]
+    commit: Commit
+    fixes: list[Commit]
+
+    def key(self):
+        return ((self.details or '').lower(), self.sub_details, self.message)
+
+
+class Changelog:
+    MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
+
+    def __init__(self, groups, repo):
+        self._groups = groups
+        self._repo = repo
+
+    def __str__(self):
+        return '\n'.join(self._format_groups(self._groups)).replace('\t', '    ')
+
+    def _format_groups(self, groups):
+        for item in CommitGroup:
+            group = groups[item]
+            if group:
+                yield self.format_module(item.value, group)
+
+    def format_module(self, name, group):
+        result = f'\n#### {name} changes\n' if name else '\n'
+        return result + '\n'.join(self._format_group(group))
+
+    def _format_group(self, group):
+        sorted_group = sorted(group, key=CommitInfo.key)
+        detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
+        for details, items in detail_groups:
+            if not details:
+                indent = ''
+            else:
+                yield f'- {details}'
+                indent = '\t'
+
+            if details == 'cleanup':
+                items, cleanup_misc_items = self._filter_cleanup_misc_items(items)
+
+            sub_detail_groups = itertools.groupby(items, lambda item: item.sub_details)
+            for sub_details, entries in sub_detail_groups:
+                if not sub_details:
+                    for entry in entries:
+                        yield f'{indent}- {self.format_single_change(entry)}'
+                    continue
+
+                prefix = f'{indent}- {", ".join(sub_details)}'
+                entries = list(entries)
+                if len(entries) == 1:
+                    yield f'{prefix}: {self.format_single_change(entries[0])}'
+                    continue
+
+                yield prefix
+                for entry in entries:
+                    yield f'{indent}\t- {self.format_single_change(entry)}'
+
+            if details == 'cleanup' and cleanup_misc_items:
+                yield from self._format_cleanup_misc_sub_group(cleanup_misc_items)
+
+    def _filter_cleanup_misc_items(self, items):
+        cleanup_misc_items = defaultdict(list)
+        non_misc_items = []
+        for item in items:
+            if self.MISC_RE.search(item.message):
+                cleanup_misc_items[tuple(item.commit.authors)].append(item)
+            else:
+                non_misc_items.append(item)
+
+        return non_misc_items, cleanup_misc_items
+
+    def _format_cleanup_misc_sub_group(self, group):
+        prefix = '\t- Miscellaneous'
+        if len(group) == 1:
+            yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}'
+            return
+
+        yield prefix
+        for message in self._format_cleanup_misc_items(group):
+            yield f'\t\t- {message}'
+
+    def _format_cleanup_misc_items(self, group):
+        for authors, infos in group.items():
+            message = ', '.join(
+                self._format_message_link(None, info.commit.hash)
+                for info in sorted(infos, key=lambda item: item.commit.hash or ''))
+            yield f'{message} by {self._format_authors(authors)}'
+
+    def format_single_change(self, info):
+        message = self._format_message_link(info.message, info.commit.hash)
+        if info.issues:
+            message = f'{message} ({self._format_issues(info.issues)})'
+
+        if info.commit.authors:
+            message = f'{message} by {self._format_authors(info.commit.authors)}'
+
+        if info.fixes:
+            fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
+
+            authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
+            if authors != info.commit.authors:
+                fix_message = f'{fix_message} by {self._format_authors(authors)}'
+
+            message = f'{message} (With fixes in {fix_message})'
+
+        return message
+
+    def _format_message_link(self, message, hash):
+        assert message or hash, 'Improperly defined commit message or override'
+        message = message if message else hash[:7]
+        return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
+
+    def _format_issues(self, issues):
+        return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
+
+    @staticmethod
+    def _format_authors(authors):
+        return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
+
+    @property
+    def repo_url(self):
+        return f'{BASE_URL}/{self._repo}'
+
+
+class CommitRange:
+    COMMAND = 'git'
+    COMMIT_SEPARATOR = '-----'
+
+    AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
+    MESSAGE_RE = re.compile(r'''
+        (?:\[
+            (?P<prefix>[^\]\/:,]+)
+            (?:/(?P<details>[^\]:,]+))?
+            (?:[:,](?P<sub_details>[^\]]+))?
+        \]\ )?
+        (?:`?(?P<sub_details_alt>[^:`]+)`?: )?
+        (?P<message>.+?)
+        (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
+        ''', re.VERBOSE | re.DOTALL)
+    EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
+    FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+for)?|Revert)\s+([\da-f]{40})')
+    UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
+
+    def __init__(self, start, end, default_author=None) -> None:
+        self._start = start
+        self._end = end
+        self._commits, self._fixes = self._get_commits_and_fixes(default_author)
+        self._commits_added = []
+
+    @classmethod
+    def from_single(cls, commitish='HEAD', default_author=None):
+        start_commitish = cls.get_prev_tag(commitish)
+        end_commitish = cls.get_next_tag(commitish)
+        if start_commitish == end_commitish:
+            start_commitish = cls.get_prev_tag(f'{commitish}~')
+        logger.info(f'Determined range from {commitish!r}: {start_commitish}..{end_commitish}')
+        return cls(start_commitish, end_commitish, default_author)
+
+    @classmethod
+    def get_prev_tag(cls, commitish):
+        command = [cls.COMMAND, 'describe', '--tags', '--abbrev=0', '--exclude=*[^0-9.]*', commitish]
+        return subprocess.check_output(command, text=True).strip()
+
+    @classmethod
+    def get_next_tag(cls, commitish):
+        result = subprocess.run(
+            [cls.COMMAND, 'describe', '--contains', '--abbrev=0', commitish],
+            stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
+        if result.returncode:
+            return 'HEAD'
+
+        return result.stdout.partition('~')[0].strip()
+
+    def __iter__(self):
+        return iter(itertools.chain(self._commits.values(), self._commits_added))
+
+    def __len__(self):
+        return len(self._commits) + len(self._commits_added)
+
+    def __contains__(self, commit):
+        if isinstance(commit, Commit):
+            if not commit.hash:
+                return False
+            commit = commit.hash
+
+        return commit in self._commits
+
+    def _is_ancestor(self, commitish):
+        return bool(subprocess.call(
+            [self.COMMAND, 'merge-base', '--is-ancestor', commitish, self._start]))
+
+    def _get_commits_and_fixes(self, default_author):
+        result = subprocess.check_output([
+            self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
+            f'{self._start}..{self._end}'], text=True)
+
+        commits = {}
+        fixes = defaultdict(list)
+        lines = iter(result.splitlines(False))
+        for line in lines:
+            commit_hash = line
+            short = next(lines)
+            skip = short.startswith('Release ') or short == '[version] update'
+
+            authors = [default_author] if default_author else []
+            for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
+                match = self.AUTHOR_INDICATOR_RE.match(line)
+                if match:
+                    authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
+
+            commit = Commit(commit_hash, short, authors)
+            if skip:
+                logger.debug(f'Skipped commit: {commit}')
+                continue
+
+            fix_match = self.FIXES_RE.search(commit.short)
+            if fix_match:
+                commitish = fix_match.group(1)
+                fixes[commitish].append(commit)
+
+            commits[commit.hash] = commit
+
+        for commitish, fix_commits in fixes.items():
+            if commitish in commits:
+                hashes = ', '.join(commit.hash[:7] for commit in fix_commits)
+                logger.info(f'Found fix(es) for {commitish[:7]}: {hashes}')
+                for fix_commit in fix_commits:
+                    del commits[fix_commit.hash]
+            else:
+                logger.debug(f'Commit with fixes not in changes: {commitish[:7]}')
+
+        return commits, fixes
+
+    def apply_overrides(self, overrides):
+        for override in overrides:
+            when = override.get('when')
+            if when and when not in self and when != self._start:
+                logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
+                continue
+
+            override_hash = override.get('hash')
+            if override['action'] == 'add':
+                commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
+                logger.info(f'ADD    {commit}')
+                self._commits_added.append(commit)
+
+            elif override['action'] == 'remove':
+                if override_hash in self._commits:
+                    logger.info(f'REMOVE {self._commits[override_hash]}')
+                    del self._commits[override_hash]
+
+            elif override['action'] == 'change':
+                if override_hash not in self._commits:
+                    continue
+                commit = Commit(override_hash, override['short'], override['authors'])
+                logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
+                self._commits[commit.hash] = commit
+
+        self._commits = {key: value for key, value in reversed(self._commits.items())}
+
+    def groups(self):
+        groups = defaultdict(list)
+        for commit in self:
+            upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short)
+            if upstream_re:
+                commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}'
+
+            match = self.MESSAGE_RE.fullmatch(commit.short)
+            if not match:
+                logger.error(f'Error parsing short commit message: {commit.short!r}')
+                continue
+
+            prefix, details, sub_details, sub_details_alt, message, issues = match.groups()
+            group = None
+            if prefix:
+                if prefix == 'priority':
+                    prefix, _, details = (details or '').partition('/')
+                    logger.debug(f'Priority: {message!r}')
+                    group = CommitGroup.PRIORITY
+
+                if not details and prefix:
+                    if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'):
+                        logger.debug(f'Replaced details with {prefix!r}')
+                        details = prefix or None
+
+                if details == 'common':
+                    details = None
+
+                if details:
+                    details = details.strip()
+
+            else:
+                group = CommitGroup.CORE
+
+            sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.lower().replace(':', ',')
+            sub_details = tuple(filter(None, map(str.strip, sub_details.split(','))))
+
+            issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
+
+            if not group:
+                group = CommitGroup.get(prefix.lower())
+                if not group:
+                    if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
+                        group = CommitGroup.EXTRACTOR
+                    else:
+                        group = CommitGroup.POSTPROCESSOR
+                    logger.warning(f'Failed to map {commit.short!r}, selected {group.name}')
+
+            commit_info = CommitInfo(
+                details, sub_details, message.strip(),
+                issues, commit, self._fixes[commit.hash])
+            logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
+            groups[group].append(commit_info)
+
+        return groups
+
+
+def get_new_contributors(contributors_path, commits):
+    contributors = set()
+    if contributors_path.exists():
+        with contributors_path.open() as file:
+            for line in filter(None, map(str.strip, file)):
+                author, _, _ = line.partition(' (')
+                authors = author.split('/')
+                contributors.update(map(str.casefold, authors))
+
+    new_contributors = set()
+    for commit in commits:
+        for author in commit.authors:
+            author_folded = author.casefold()
+            if author_folded not in contributors:
+                contributors.add(author_folded)
+                new_contributors.add(author)
+
+    return sorted(new_contributors, key=str.casefold)
+
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description='Create a changelog markdown from a git commit range')
+    parser.add_argument(
+        'commitish', default='HEAD', nargs='?',
+        help='The commitish to create the range from (default: %(default)s)')
+    parser.add_argument(
+        '-v', '--verbosity', action='count', default=0,
+        help='increase verbosity (can be used twice)')
+    parser.add_argument(
+        '-c', '--contributors', action='store_true',
+        help='update CONTRIBUTORS file (default: %(default)s)')
+    parser.add_argument(
+        '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
+        help='path to the CONTRIBUTORS file')
+    parser.add_argument(
+        '--no-override', action='store_true',
+        help='skip override json in commit generation (default: %(default)s)')
+    parser.add_argument(
+        '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
+        help='path to the changelog_override.json file')
+    parser.add_argument(
+        '--default-author', default='pukkandan',
+        help='the author to use without a author indicator (default: %(default)s)')
+    parser.add_argument(
+        '--repo', default='yt-dlp/yt-dlp',
+        help='the github repository to use for the operations (default: %(default)s)')
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
+        level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
+
+    commits = CommitRange.from_single(args.commitish, args.default_author)
+
+    if not args.no_override:
+        if args.override_path.exists():
+            with args.override_path.open() as file:
+                overrides = json.load(file)
+            commits.apply_overrides(overrides)
+        else:
+            logger.warning(f'File {args.override_path.as_posix()} does not exist')
+
+    logger.info(f'Loaded {len(commits)} commits')
+
+    new_contributors = get_new_contributors(args.contributors_path, commits)
+    if new_contributors:
+        if args.contributors:
+            with args.contributors_path.open('a') as file:
+                file.writelines(f'{contributor}\n' for contributor in new_contributors)
+        logger.info(f'New contributors: {", ".join(new_contributors)}')
+
+    print(Changelog(commits.groups(), args.repo))