From ed88c2abc9bcc29e80e6c9cc72967d2479554a6a Mon Sep 17 00:00:00 2001
From: Robert Resch <robert@resch.dev>
Date: Wed, 3 Apr 2024 15:43:12 +0200
Subject: [PATCH] Replace pytest-test-groups by custom tests splitter (#114381)

Co-authored-by: Martin Hjelmare <marhje52@gmail.com>
Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
---
 .github/workflows/ci.yaml | 255 +++++++++++++++++++++++++++++++-------
 .gitignore                |   3 +
 requirements_test.txt     |   1 -
 script/split_tests.py     | 225 +++++++++++++++++++++++++++++++++
 4 files changed, 437 insertions(+), 47 deletions(-)
 create mode 100755 script/split_tests.py

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 9ab7e235a68..7f78b5fcdab 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -670,14 +670,61 @@ jobs:
           python --version
           mypy homeassistant/components/${{ needs.info.outputs.integrations_glob }}
 
-  pytest:
+  prepare-pytest-full:
     runs-on: ubuntu-22.04
     if: |
       (github.event_name != 'push' || github.event.repository.full_name == 'home-assistant/core')
       && github.event.inputs.lint-only != 'true'
       && github.event.inputs.pylint-only != 'true'
       && github.event.inputs.mypy-only != 'true'
-      && (needs.info.outputs.test_full_suite == 'true' || needs.info.outputs.tests_glob)
+      && needs.info.outputs.test_full_suite == 'true'
+    needs:
+      - info
+      - base
+    name: Split tests for full run
+    steps:
+      - name: Install additional OS dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get -y install \
+            bluez \
+            ffmpeg
+      - name: Check out code from GitHub
+        uses: actions/checkout@v4.1.2
+      - name: Set up Python ${{ env.DEFAULT_PYTHON }}
+        id: python
+        uses: actions/setup-python@v5.1.0
+        with:
+          python-version: ${{ env.DEFAULT_PYTHON }}
+          check-latest: true
+      - name: Restore base Python virtual environment
+        id: cache-venv
+        uses: actions/cache/restore@v4.0.2
+        with:
+          path: venv
+          fail-on-cache-miss: true
+          key: >-
+            ${{ runner.os }}-${{ steps.python.outputs.python-version }}-${{
+            needs.info.outputs.python_cache_key }}
+      - name: Run split_tests.py
+        run: |
+          . venv/bin/activate
+          python -m script.split_tests ${{ needs.info.outputs.test_group_count }} tests
+      - name: Upload pytest_buckets
+        uses: actions/upload-artifact@v4.3.1
+        with:
+          name: pytest_buckets
+          path: pytest_buckets.txt
+          overwrite: true
+
+  pytest-full:
+    runs-on: ubuntu-22.04
+    if: |
+      (github.event_name != 'push' || github.event.repository.full_name == 'home-assistant/core')
+      && github.event.inputs.lint-only != 'true'
+      && github.event.inputs.pylint-only != 'true'
+      && github.event.inputs.mypy-only != 'true'
+      && needs.info.outputs.test_full_suite == 'true'
     needs:
       - info
       - base
@@ -686,6 +733,7 @@ jobs:
       - lint-other
       - lint-ruff
       - mypy
+      - prepare-pytest-full
     strategy:
       fail-fast: false
       matrix:
@@ -722,12 +770,15 @@ jobs:
       - name: Register pytest slow test problem matcher
         run: |
           echo "::add-matcher::.github/workflows/matchers/pytest-slow.json"
+      - name: Download pytest_buckets
+        uses: actions/download-artifact@v4.1.4
+        with:
+          name: pytest_buckets
       - name: Compile English translations
         run: |
           . venv/bin/activate
           python3 -m script.translations develop --all
-      - name: Run pytest (fully)
-        if: needs.info.outputs.test_full_suite == 'true'
+      - name: Run pytest
         timeout-minutes: 60
         id: pytest-full
         env:
@@ -748,50 +799,13 @@ jobs:
             --durations=10 \
             -n auto \
             --dist=loadfile \
-            --test-group-count ${{ needs.info.outputs.test_group_count }} \
-            --test-group=${{ matrix.group }} \
             ${cov_params[@]} \
             -o console_output_style=count \
             -p no:sugar \
-            tests \
-              2>&1 | tee pytest-${{ matrix.python-version }}-${{ matrix.group }}.txt
-      - name: Run pytest (partially)
-        if: needs.info.outputs.test_full_suite == 'false'
-        timeout-minutes: 10
-        id: pytest-partial
-        shell: bash
-        env:
-          PYTHONDONTWRITEBYTECODE: 1
-        run: |
-          . venv/bin/activate
-          python --version
-          set -o pipefail
-
-          if [[ ! -f "tests/components/${{ matrix.group }}/__init__.py" ]]; then
-            echo "::error:: missing file tests/components/${{ matrix.group }}/__init__.py"
-            exit 1
-          fi
-
-          cov_params=()
-          if [[ "${{ needs.info.outputs.skip_coverage }}" != "true" ]]; then
-            cov_params+=(--cov="homeassistant.components.${{ matrix.group }}")
-            cov_params+=(--cov-report=xml)
-            cov_params+=(--cov-report=term-missing)
-          fi
-
-          python3 -b -X dev -m pytest \
-            -qq \
-            --timeout=9 \
-            -n auto \
-            ${cov_params[@]} \
-            -o console_output_style=count \
-            --durations=0 \
-            --durations-min=1 \
-            -p no:sugar \
-            tests/components/${{ matrix.group }} \
+            $(sed -n "${{ matrix.group }},1p" pytest_buckets.txt) \
               2>&1 | tee pytest-${{ matrix.python-version }}-${{ matrix.group }}.txt
       - name: Upload pytest output
-        if: success() || failure() && (steps.pytest-full.conclusion == 'failure' || steps.pytest-partial.conclusion == 'failure')
+        if: success() || failure() && steps.pytest-full.conclusion == 'failure'
         uses: actions/upload-artifact@v4.3.1
         with:
           name: pytest-${{ github.run_number }}-${{ matrix.python-version }}-${{ matrix.group }}
@@ -804,6 +818,8 @@ jobs:
           name: coverage-${{ matrix.python-version }}-${{ matrix.group }}
           path: coverage.xml
           overwrite: true
+      - name: Remove pytest_buckets
+        run: rm pytest_buckets.txt
       - name: Check dirty
         run: |
           ./script/check_dirty
@@ -1053,13 +1069,160 @@ jobs:
         run: |
           ./script/check_dirty
 
-  coverage:
-    name: Upload test coverage to Codecov
+  coverage-full:
+    name: Upload test coverage to Codecov (full suite)
     if: needs.info.outputs.skip_coverage != 'true'
     runs-on: ubuntu-22.04
     needs:
       - info
-      - pytest
+      - pytest-full
+      - pytest-postgres
+      - pytest-mariadb
+    timeout-minutes: 10
+    steps:
+      - name: Check out code from GitHub
+        uses: actions/checkout@v4.1.2
+      - name: Download all coverage artifacts
+        uses: actions/download-artifact@v4.1.4
+        with:
+          pattern: coverage-*
+      - name: Upload coverage to Codecov (full coverage)
+        if: needs.info.outputs.test_full_suite == 'true'
+        uses: Wandalen/wretry.action@v2.1.0
+        with:
+          action: codecov/codecov-action@v3.1.3
+          with: |
+            fail_ci_if_error: true
+            flags: full-suite
+            token: ${{ env.CODECOV_TOKEN }}
+          attempt_limit: 5
+          attempt_delay: 30000
+      - name: Upload coverage to Codecov (partial coverage)
+        if: needs.info.outputs.test_full_suite == 'false'
+        uses: Wandalen/wretry.action@v2.1.0
+        with:
+          action: codecov/codecov-action@v3.1.3
+          with: |
+            fail_ci_if_error: true
+            token: ${{ env.CODECOV_TOKEN }}
+          attempt_limit: 5
+          attempt_delay: 30000
+
+  pytest-partial:
+    runs-on: ubuntu-22.04
+    if: |
+      (github.event_name != 'push' || github.event.repository.full_name == 'home-assistant/core')
+      && github.event.inputs.lint-only != 'true'
+      && github.event.inputs.pylint-only != 'true'
+      && github.event.inputs.mypy-only != 'true'
+      && needs.info.outputs.tests_glob
+    needs:
+      - info
+      - base
+      - gen-requirements-all
+      - hassfest
+      - lint-other
+      - lint-ruff
+      - mypy
+    strategy:
+      fail-fast: false
+      matrix:
+        group: ${{ fromJson(needs.info.outputs.test_groups) }}
+        python-version: ${{ fromJson(needs.info.outputs.python_versions) }}
+    name: >-
+      Run tests Python ${{ matrix.python-version }} (${{ matrix.group }})
+    steps:
+      - name: Install additional OS dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get -y install \
+            bluez \
+            ffmpeg
+      - name: Check out code from GitHub
+        uses: actions/checkout@v4.1.2
+      - name: Set up Python ${{ matrix.python-version }}
+        id: python
+        uses: actions/setup-python@v5.1.0
+        with:
+          python-version: ${{ matrix.python-version }}
+          check-latest: true
+      - name: Restore full Python ${{ matrix.python-version }} virtual environment
+        id: cache-venv
+        uses: actions/cache/restore@v4.0.2
+        with:
+          path: venv
+          fail-on-cache-miss: true
+          key: ${{ runner.os }}-${{ steps.python.outputs.python-version }}-${{
+            needs.info.outputs.python_cache_key }}
+      - name: Register Python problem matcher
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/python.json"
+      - name: Register pytest slow test problem matcher
+        run: |
+          echo "::add-matcher::.github/workflows/matchers/pytest-slow.json"
+      - name: Compile English translations
+        run: |
+          . venv/bin/activate
+          python3 -m script.translations develop --all
+      - name: Run pytest
+        timeout-minutes: 10
+        id: pytest-partial
+        shell: bash
+        env:
+          PYTHONDONTWRITEBYTECODE: 1
+        run: |
+          . venv/bin/activate
+          python --version
+          set -o pipefail
+
+          if [[ ! -f "tests/components/${{ matrix.group }}/__init__.py" ]]; then
+            echo "::error:: missing file tests/components/${{ matrix.group }}/__init__.py"
+            exit 1
+          fi
+
+          cov_params=()
+          if [[ "${{ needs.info.outputs.skip_coverage }}" != "true" ]]; then
+            cov_params+=(--cov="homeassistant.components.${{ matrix.group }}")
+            cov_params+=(--cov-report=xml)
+            cov_params+=(--cov-report=term-missing)
+          fi
+
+          python3 -b -X dev -m pytest \
+            -qq \
+            --timeout=9 \
+            -n auto \
+            ${cov_params[@]} \
+            -o console_output_style=count \
+            --durations=0 \
+            --durations-min=1 \
+            -p no:sugar \
+            tests/components/${{ matrix.group }} \
+              2>&1 | tee pytest-${{ matrix.python-version }}-${{ matrix.group }}.txt
+      - name: Upload pytest output
+        if: success() || failure() && steps.pytest-partial.conclusion == 'failure'
+        uses: actions/upload-artifact@v4.3.1
+        with:
+          name: pytest-${{ github.run_number }}-${{ matrix.python-version }}-${{ matrix.group }}
+          path: pytest-*.txt
+          overwrite: true
+      - name: Upload coverage artifact
+        if: needs.info.outputs.skip_coverage != 'true'
+        uses: actions/upload-artifact@v4.3.1
+        with:
+          name: coverage-${{ matrix.python-version }}-${{ matrix.group }}
+          path: coverage.xml
+          overwrite: true
+      - name: Check dirty
+        run: |
+          ./script/check_dirty
+
+  coverage-partial:
+    name: Upload test coverage to Codecov (partial suite)
+    if: needs.info.outputs.skip_coverage != 'true'
+    runs-on: ubuntu-22.04
+    needs:
+      - info
+      - pytest-partial
     timeout-minutes: 10
     steps:
       - name: Check out code from GitHub
diff --git a/.gitignore b/.gitignore
index 8a4154e4769..206595f06c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,6 @@ tmp_cache
 
 # python-language-server / Rope
 .ropeproject
+
+# Will be created from script/split_tests.py
+pytest_buckets.txt
\ No newline at end of file
diff --git a/requirements_test.txt b/requirements_test.txt
index c2e5774e137..553f44eeb25 100644
--- a/requirements_test.txt
+++ b/requirements_test.txt
@@ -23,7 +23,6 @@ pytest-cov==5.0.0
 pytest-freezer==0.4.8
 pytest-github-actions-annotate-failures==0.2.0
 pytest-socket==0.7.0
-pytest-test-groups==1.0.3
 pytest-sugar==1.0.0
 pytest-timeout==2.3.1
 pytest-unordered==0.6.0
diff --git a/script/split_tests.py b/script/split_tests.py
new file mode 100755
index 00000000000..8da03bd749b
--- /dev/null
+++ b/script/split_tests.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+"""Helper script to split test into n buckets."""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass, field
+from math import ceil
+from pathlib import Path
+import subprocess
+import sys
+from typing import Final
+
+
+class Bucket:
+    """Class to hold bucket."""
+
+    def __init__(
+        self,
+    ):
+        """Initialize bucket."""
+        self.total_tests = 0
+        self._paths: list[str] = []
+
+    def add(self, part: TestFolder | TestFile) -> None:
+        """Add tests to bucket."""
+        part.add_to_bucket()
+        self.total_tests += part.total_tests
+        self._paths.append(str(part.path))
+
+    def get_paths_line(self) -> str:
+        """Return paths."""
+        return " ".join(self._paths) + "\n"
+
+
+class BucketHolder:
+    """Class to hold buckets."""
+
+    def __init__(self, tests_per_bucket: int, bucket_count: int) -> None:
+        """Initialize bucket holder."""
+        self._tests_per_bucket = tests_per_bucket
+        self._bucket_count = bucket_count
+        self._buckets: list[Bucket] = [Bucket() for _ in range(bucket_count)]
+
+    def split_tests(self, test_folder: TestFolder) -> None:
+        """Split tests into buckets."""
+        digits = len(str(test_folder.total_tests))
+        sorted_tests = sorted(
+            test_folder.get_all_flatten(), reverse=True, key=lambda x: x.total_tests
+        )
+        for tests in sorted_tests:
+            print(f"{tests.total_tests:>{digits}} tests in {tests.path}")
+            if tests.added_to_bucket:
+                # Already added to bucket
+                continue
+
+            smallest_bucket = min(self._buckets, key=lambda x: x.total_tests)
+            if (
+                smallest_bucket.total_tests + tests.total_tests < self._tests_per_bucket
+            ) or isinstance(tests, TestFile):
+                smallest_bucket.add(tests)
+
+        # verify that all tests are added to a bucket
+        if not test_folder.added_to_bucket:
+            raise ValueError("Not all tests are added to a bucket")
+
+    def create_ouput_file(self) -> None:
+        """Create output file."""
+        with open("pytest_buckets.txt", "w") as file:
+            for idx, bucket in enumerate(self._buckets):
+                print(f"Bucket {idx+1} has {bucket.total_tests} tests")
+                file.write(bucket.get_paths_line())
+
+
+@dataclass
+class TestFile:
+    """Class represents a single test file and the number of tests it has."""
+
+    total_tests: int
+    path: Path
+    added_to_bucket: bool = field(default=False, init=False)
+
+    def add_to_bucket(self) -> None:
+        """Add test file to bucket."""
+        if self.added_to_bucket:
+            raise ValueError("Already added to bucket")
+        self.added_to_bucket = True
+
+    def __gt__(self, other: TestFile) -> bool:
+        """Return if greater than."""
+        return self.total_tests > other.total_tests
+
+
+class TestFolder:
+    """Class to hold a folder with test files and folders."""
+
+    def __init__(self, path: Path) -> None:
+        """Initialize test folder."""
+        self.path: Final = path
+        self.children: dict[Path, TestFolder | TestFile] = {}
+
+    @property
+    def total_tests(self) -> int:
+        """Return total tests."""
+        return sum([test.total_tests for test in self.children.values()])
+
+    @property
+    def added_to_bucket(self) -> bool:
+        """Return if added to bucket."""
+        return all(test.added_to_bucket for test in self.children.values())
+
+    def add_to_bucket(self) -> None:
+        """Add test file to bucket."""
+        if self.added_to_bucket:
+            raise ValueError("Already added to bucket")
+        for child in self.children.values():
+            child.add_to_bucket()
+
+    def __repr__(self) -> str:
+        """Return representation."""
+        return (
+            f"TestFolder(total_tests={self.total_tests}, children={len(self.children)})"
+        )
+
+    def add_test_file(self, file: TestFile) -> None:
+        """Add test file to folder."""
+        path = file.path
+        relative_path = path.relative_to(self.path)
+        if not relative_path.parts:
+            raise ValueError("Path is not a child of this folder")
+
+        if len(relative_path.parts) == 1:
+            self.children[path] = file
+            return
+
+        child_path = self.path / relative_path.parts[0]
+        if (child := self.children.get(child_path)) is None:
+            self.children[child_path] = child = TestFolder(child_path)
+        elif not isinstance(child, TestFolder):
+            raise ValueError("Child is not a folder")
+        child.add_test_file(file)
+
+    def get_all_flatten(self) -> list[TestFolder | TestFile]:
+        """Return self and all children as flatten list."""
+        result: list[TestFolder | TestFile] = [self]
+        for child in self.children.values():
+            if isinstance(child, TestFolder):
+                result.extend(child.get_all_flatten())
+            else:
+                result.append(child)
+        return result
+
+
+def collect_tests(path: Path) -> TestFolder:
+    """Collect all tests."""
+    result = subprocess.run(
+        ["pytest", "--collect-only", "-qq", "-p", "no:warnings", path],
+        check=False,
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        print("Failed to collect tests:")
+        print(result.stderr)
+        print(result.stdout)
+        sys.exit(1)
+
+    folder = TestFolder(path)
+
+    for line in result.stdout.splitlines():
+        if not line.strip():
+            continue
+        file_path, _, total_tests = line.partition(": ")
+        if not path or not total_tests:
+            print(f"Unexpected line: {line}")
+            sys.exit(1)
+
+        file = TestFile(int(total_tests), Path(file_path))
+        folder.add_test_file(file)
+
+    return folder
+
+
+def main() -> None:
+    """Execute script."""
+    parser = argparse.ArgumentParser(description="Split tests into n buckets.")
+
+    def check_greater_0(value: str) -> int:
+        ivalue = int(value)
+        if ivalue <= 0:
+            raise argparse.ArgumentTypeError(
+                f"{value} is an invalid. Must be greater than 0"
+            )
+        return ivalue
+
+    parser.add_argument(
+        "bucket_count",
+        help="Number of buckets to split tests into",
+        type=check_greater_0,
+    )
+    parser.add_argument(
+        "path",
+        help="Path to the test files to split into buckets",
+        type=Path,
+    )
+
+    arguments = parser.parse_args()
+
+    print("Collecting tests...")
+    tests = collect_tests(arguments.path)
+    tests_per_bucket = ceil(tests.total_tests / arguments.bucket_count)
+
+    bucket_holder = BucketHolder(tests_per_bucket, arguments.bucket_count)
+    print("Splitting tests...")
+    bucket_holder.split_tests(tests)
+
+    print(f"Total tests: {tests.total_tests}")
+    print(f"Estimated tests per bucket: {tests_per_bucket}")
+
+    bucket_holder.create_ouput_file()
+
+
+if __name__ == "__main__":
+    main()