Add support for daily and monthly statistics (#57576)

* Add support for daily and monthly statistics * Remove debug code * Format code * Don't use dateutil package * Remove 2 TODOs * Remove TODO * Add comments
2021-10-19 08:29:23 +02:00 · 2021-10-19 08:29:23 +02:00 · 9a26a8cfd8
parent 6576225c48
commit 9a26a8cfd8
2 changed files with 243 additions and 17 deletions
--- a/homeassistant/components/recorder/statistics.py
+++ b/homeassistant/components/recorder/statistics.py
@ -7,6 +7,7 @@ import dataclasses
 from datetime import datetime, timedelta
 from itertools import chain, groupby
 import logging
+from statistics import mean
 from typing import TYPE_CHECKING, Any, Literal

 from sqlalchemy import bindparam, func
@ -583,13 +584,107 @@ def _statistics_during_period_query(
    return baked_query  # type: ignore[no-any-return]


+def _reduce_statistics(
+    stats: dict[str, list[dict[str, Any]]],
+    same_period: Callable[[datetime, datetime], bool],
+    period_start_end: Callable[[datetime], tuple[datetime, datetime]],
+    period: timedelta,
+) -> dict[str, list[dict[str, Any]]]:
+    """Reduce hourly statistics to daily or monthly statistics."""
+    result: dict[str, list[dict[str, Any]]] = defaultdict(list)
+    for statistic_id, stat_list in stats.items():
+        max_values: list[float] = []
+        mean_values: list[float] = []
+        min_values: list[float] = []
+        prev_stat: dict[str, Any] = stat_list[0]
+
+        # Loop over the hourly statistics + a fake entry to end the period
+        for statistic in chain(
+            stat_list, ({"start": stat_list[-1]["start"] + period},)
+        ):
+            if not same_period(prev_stat["start"], statistic["start"]):
+                start, end = period_start_end(prev_stat["start"])
+                # The previous statistic was the last entry of the period
+                result[statistic_id].append(
+                    {
+                        "statistic_id": statistic_id,
+                        "start": start.isoformat(),
+                        "end": end.isoformat(),
+                        "mean": mean(mean_values) if mean_values else None,
+                        "min": min(min_values) if min_values else None,
+                        "max": max(max_values) if max_values else None,
+                        "last_reset": prev_stat["last_reset"],
+                        "state": prev_stat["state"],
+                        "sum": prev_stat["sum"],
+                    }
+                )
+                max_values = []
+                mean_values = []
+                min_values = []
+            if statistic.get("max") is not None:
+                max_values.append(statistic["max"])
+            if statistic.get("mean") is not None:
+                mean_values.append(statistic["mean"])
+            if statistic.get("min") is not None:
+                min_values.append(statistic["min"])
+            prev_stat = statistic
+
+    return result
+
+
+def _reduce_statistics_per_day(
+    stats: dict[str, list[dict[str, Any]]]
+) -> dict[str, list[dict[str, Any]]]:
+    """Reduce hourly statistics to daily statistics."""
+
+    def same_period(time1: datetime, time2: datetime) -> bool:
+        """Return True if time1 and time2 are in the same date."""
+        date1 = dt_util.as_local(time1).date()
+        date2 = dt_util.as_local(time2).date()
+        return date1 == date2
+
+    def period_start_end(time: datetime) -> tuple[datetime, datetime]:
+        """Return the start and end of the period (day) time is within."""
+        start = dt_util.as_utc(
+            dt_util.as_local(time).replace(hour=0, minute=0, second=0, microsecond=0)
+        )
+        end = start + timedelta(days=1)
+        return (start, end)
+
+    return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=1))
+
+
+def _reduce_statistics_per_month(
+    stats: dict[str, list[dict[str, Any]]]
+) -> dict[str, list[dict[str, Any]]]:
+    """Reduce hourly statistics to monthly statistics."""
+
+    def same_period(time1: datetime, time2: datetime) -> bool:
+        """Return True if time1 and time2 are in the same year and month."""
+        date1 = dt_util.as_local(time1).date()
+        date2 = dt_util.as_local(time2).date()
+        return (date1.year, date1.month) == (date2.year, date2.month)
+
+    def period_start_end(time: datetime) -> tuple[datetime, datetime]:
+        """Return the start and end of the period (month) time is within."""
+        start = dt_util.as_utc(
+            dt_util.as_local(time).replace(
+                day=1, hour=0, minute=0, second=0, microsecond=0
+            )
+        )
+        end = (start + timedelta(days=31)).replace(day=1)
+        return (start, end)
+
+    return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=31))
+
+
 def statistics_during_period(
    hass: HomeAssistant,
    start_time: datetime,
    end_time: datetime | None = None,
    statistic_ids: list[str] | None = None,
-    period: Literal["hour"] | Literal["5minute"] = "hour",
-) -> dict[str, list[dict[str, str]]]:
+    period: Literal["5minute", "day", "hour", "month"] = "hour",
+) -> dict[str, list[dict[str, Any]]]:
    """Return statistics during UTC period start_time - end_time for the statistic_ids.

    If end_time is omitted, returns statistics newer than or equal to start_time.
@ -606,14 +701,14 @@ def statistics_during_period(
        if statistic_ids is not None:
            metadata_ids = [metadata_id for metadata_id, _ in metadata.values()]

-        if period == "hour":
-            bakery = STATISTICS_BAKERY
-            base_query = QUERY_STATISTICS
-            table = Statistics
-        else:
+        if period == "5minute":
            bakery = STATISTICS_SHORT_TERM_BAKERY
            base_query = QUERY_STATISTICS_SHORT_TERM
            table = StatisticsShortTerm
+        else:
+            bakery = STATISTICS_BAKERY
+            base_query = QUERY_STATISTICS
+            table = Statistics

        baked_query = _statistics_during_period_query(
            hass, end_time, statistic_ids, bakery, base_query, table
@ -627,10 +722,20 @@ def statistics_during_period(
        if not stats:
            return {}
        # Return statistics combined with metadata
-        return _sorted_statistics_to_dict(
-            hass, session, stats, statistic_ids, metadata, True, table, start_time
+        if period not in ("day", "month"):
+            return _sorted_statistics_to_dict(
+                hass, session, stats, statistic_ids, metadata, True, table, start_time
+            )
+
+        result = _sorted_statistics_to_dict(
+            hass, session, stats, statistic_ids, metadata, True, table, start_time, True
        )

+        if period == "day":
+            return _reduce_statistics_per_day(result)
+
+        return _reduce_statistics_per_month(result)
+

 def get_last_statistics(
    hass: HomeAssistant, number_of_stats: int, statistic_id: str, convert_units: bool
@ -718,6 +823,7 @@ def _sorted_statistics_to_dict(
    convert_units: bool,
    table: type[Statistics | StatisticsShortTerm],
    start_time: datetime | None,
+    start_time_as_datetime: bool = False,
 ) -> dict[str, list[dict]]:
    """Convert SQL results into JSON friendly data structure."""
    result: dict = defaultdict(list)
@ -765,7 +871,7 @@ def _sorted_statistics_to_dict(
            ent_results.append(
                {
                    "statistic_id": statistic_id,
-                    "start": start.isoformat(),
+                    "start": start if start_time_as_datetime else start.isoformat(),
                    "end": end.isoformat(),
                    "mean": convert(db_state.mean, units),
                    "min": convert(db_state.min, units),
--- a/tests/components/sensor/test_recorder.py
+++ b/tests/components/sensor/test_recorder.py
@ -2017,16 +2017,19 @@ def test_compile_hourly_statistics_changing_statistics(
    "db_supports_row_number,in_log,not_in_log",
    [(True, "row_number", None), (False, None, "row_number")],
 )
-def test_compile_statistics_hourly_summary(
+def test_compile_statistics_hourly_daily_monthly_summary(
    hass_recorder, caplog, db_supports_row_number, in_log, not_in_log
 ):
-    """Test compiling hourly statistics."""
+    """Test compiling hourly statistics + monthly and daily summary."""
    zero = dt_util.utcnow()
-    zero = zero.replace(minute=0, second=0, microsecond=0)
-    # Travel to the future, recorder gets confused otherwise because states are added
-    # before the start of the recorder_run
-    zero += timedelta(hours=1)
-    hass = hass_recorder()
+    # August 31st, 23:00 local time
+    zero = zero.replace(
+        year=2021, month=9, day=1, hour=5, minute=0, second=0, microsecond=0
+    )
+    with patch(
+        "homeassistant.components.recorder.models.dt_util.utcnow", return_value=zero
+    ):
+        hass = hass_recorder()
    recorder = hass.data[DATA_INSTANCE]
    recorder._db_supports_row_number = db_supports_row_number
    setup_component(hass, "sensor", {})
@ -2265,6 +2268,123 @@ def test_compile_statistics_hourly_summary(
        start += timedelta(hours=1)
        end += timedelta(hours=1)
    assert stats == expected_stats
+
+    stats = statistics_during_period(hass, zero, period="day")
+    expected_stats = {
+        "sensor.test1": [],
+        "sensor.test2": [],
+        "sensor.test3": [],
+        "sensor.test4": [],
+    }
+    start = dt_util.parse_datetime("2021-08-31T06:00:00+00:00")
+    end = start + timedelta(days=1)
+    for i in range(2):
+        for entity_id in [
+            "sensor.test1",
+            "sensor.test2",
+            "sensor.test3",
+            "sensor.test4",
+        ]:
+            expected_average = (
+                mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
+                if entity_id in expected_averages
+                else None
+            )
+            expected_minimum = (
+                min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
+                if entity_id in expected_minima
+                else None
+            )
+            expected_maximum = (
+                max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
+                if entity_id in expected_maxima
+                else None
+            )
+            expected_state = (
+                expected_states[entity_id][(i + 1) * 12 - 1]
+                if entity_id in expected_states
+                else None
+            )
+            expected_sum = (
+                expected_sums[entity_id][(i + 1) * 12 - 1]
+                if entity_id in expected_sums
+                else None
+            )
+            expected_stats[entity_id].append(
+                {
+                    "statistic_id": entity_id,
+                    "start": process_timestamp_to_utc_isoformat(start),
+                    "end": process_timestamp_to_utc_isoformat(end),
+                    "mean": approx(expected_average),
+                    "min": approx(expected_minimum),
+                    "max": approx(expected_maximum),
+                    "last_reset": None,
+                    "state": expected_state,
+                    "sum": expected_sum,
+                }
+            )
+        start += timedelta(days=1)
+        end += timedelta(days=1)
+    assert stats == expected_stats
+
+    stats = statistics_during_period(hass, zero, period="month")
+    expected_stats = {
+        "sensor.test1": [],
+        "sensor.test2": [],
+        "sensor.test3": [],
+        "sensor.test4": [],
+    }
+    start = dt_util.parse_datetime("2021-08-01T06:00:00+00:00")
+    end = dt_util.parse_datetime("2021-09-01T06:00:00+00:00")
+    for i in range(2):
+        for entity_id in [
+            "sensor.test1",
+            "sensor.test2",
+            "sensor.test3",
+            "sensor.test4",
+        ]:
+            expected_average = (
+                mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
+                if entity_id in expected_averages
+                else None
+            )
+            expected_minimum = (
+                min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
+                if entity_id in expected_minima
+                else None
+            )
+            expected_maximum = (
+                max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
+                if entity_id in expected_maxima
+                else None
+            )
+            expected_state = (
+                expected_states[entity_id][(i + 1) * 12 - 1]
+                if entity_id in expected_states
+                else None
+            )
+            expected_sum = (
+                expected_sums[entity_id][(i + 1) * 12 - 1]
+                if entity_id in expected_sums
+                else None
+            )
+            expected_stats[entity_id].append(
+                {
+                    "statistic_id": entity_id,
+                    "start": process_timestamp_to_utc_isoformat(start),
+                    "end": process_timestamp_to_utc_isoformat(end),
+                    "mean": approx(expected_average),
+                    "min": approx(expected_minimum),
+                    "max": approx(expected_maximum),
+                    "last_reset": None,
+                    "state": expected_state,
+                    "sum": expected_sum,
+                }
+            )
+        start = (start + timedelta(days=31)).replace(day=1)
+        end = (end + timedelta(days=31)).replace(day=1)
+    assert stats == expected_stats
+
    assert "Error while processing event StatisticsTask" not in caplog.text
    if in_log:
        assert in_log in caplog.text