Add support for daily and monthly statistics (#57576)

* Add support for daily and monthly statistics

* Remove debug code

* Format code

* Don't use dateutil package

* Remove 2 TODOs

* Remove TODO

* Add comments
This commit is contained in:
Erik Montnemery 2021-10-19 08:29:23 +02:00 committed by GitHub
parent 6576225c48
commit 9a26a8cfd8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 243 additions and 17 deletions

View File

@ -7,6 +7,7 @@ import dataclasses
from datetime import datetime, timedelta
from itertools import chain, groupby
import logging
from statistics import mean
from typing import TYPE_CHECKING, Any, Literal
from sqlalchemy import bindparam, func
@ -583,13 +584,107 @@ def _statistics_during_period_query(
return baked_query # type: ignore[no-any-return]
def _reduce_statistics(
stats: dict[str, list[dict[str, Any]]],
same_period: Callable[[datetime, datetime], bool],
period_start_end: Callable[[datetime], tuple[datetime, datetime]],
period: timedelta,
) -> dict[str, list[dict[str, Any]]]:
"""Reduce hourly statistics to daily or monthly statistics."""
result: dict[str, list[dict[str, Any]]] = defaultdict(list)
for statistic_id, stat_list in stats.items():
max_values: list[float] = []
mean_values: list[float] = []
min_values: list[float] = []
prev_stat: dict[str, Any] = stat_list[0]
# Loop over the hourly statistics + a fake entry to end the period
for statistic in chain(
stat_list, ({"start": stat_list[-1]["start"] + period},)
):
if not same_period(prev_stat["start"], statistic["start"]):
start, end = period_start_end(prev_stat["start"])
# The previous statistic was the last entry of the period
result[statistic_id].append(
{
"statistic_id": statistic_id,
"start": start.isoformat(),
"end": end.isoformat(),
"mean": mean(mean_values) if mean_values else None,
"min": min(min_values) if min_values else None,
"max": max(max_values) if max_values else None,
"last_reset": prev_stat["last_reset"],
"state": prev_stat["state"],
"sum": prev_stat["sum"],
}
)
max_values = []
mean_values = []
min_values = []
if statistic.get("max") is not None:
max_values.append(statistic["max"])
if statistic.get("mean") is not None:
mean_values.append(statistic["mean"])
if statistic.get("min") is not None:
min_values.append(statistic["min"])
prev_stat = statistic
return result
def _reduce_statistics_per_day(
stats: dict[str, list[dict[str, Any]]]
) -> dict[str, list[dict[str, Any]]]:
"""Reduce hourly statistics to daily statistics."""
def same_period(time1: datetime, time2: datetime) -> bool:
"""Return True if time1 and time2 are in the same date."""
date1 = dt_util.as_local(time1).date()
date2 = dt_util.as_local(time2).date()
return date1 == date2
def period_start_end(time: datetime) -> tuple[datetime, datetime]:
"""Return the start and end of the period (day) time is within."""
start = dt_util.as_utc(
dt_util.as_local(time).replace(hour=0, minute=0, second=0, microsecond=0)
)
end = start + timedelta(days=1)
return (start, end)
return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=1))
def _reduce_statistics_per_month(
stats: dict[str, list[dict[str, Any]]]
) -> dict[str, list[dict[str, Any]]]:
"""Reduce hourly statistics to monthly statistics."""
def same_period(time1: datetime, time2: datetime) -> bool:
"""Return True if time1 and time2 are in the same year and month."""
date1 = dt_util.as_local(time1).date()
date2 = dt_util.as_local(time2).date()
return (date1.year, date1.month) == (date2.year, date2.month)
def period_start_end(time: datetime) -> tuple[datetime, datetime]:
"""Return the start and end of the period (month) time is within."""
start = dt_util.as_utc(
dt_util.as_local(time).replace(
day=1, hour=0, minute=0, second=0, microsecond=0
)
)
end = (start + timedelta(days=31)).replace(day=1)
return (start, end)
return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=31))
def statistics_during_period(
hass: HomeAssistant,
start_time: datetime,
end_time: datetime | None = None,
statistic_ids: list[str] | None = None,
period: Literal["hour"] | Literal["5minute"] = "hour",
) -> dict[str, list[dict[str, str]]]:
period: Literal["5minute", "day", "hour", "month"] = "hour",
) -> dict[str, list[dict[str, Any]]]:
"""Return statistics during UTC period start_time - end_time for the statistic_ids.
If end_time is omitted, returns statistics newer than or equal to start_time.
@ -606,14 +701,14 @@ def statistics_during_period(
if statistic_ids is not None:
metadata_ids = [metadata_id for metadata_id, _ in metadata.values()]
if period == "hour":
bakery = STATISTICS_BAKERY
base_query = QUERY_STATISTICS
table = Statistics
else:
if period == "5minute":
bakery = STATISTICS_SHORT_TERM_BAKERY
base_query = QUERY_STATISTICS_SHORT_TERM
table = StatisticsShortTerm
else:
bakery = STATISTICS_BAKERY
base_query = QUERY_STATISTICS
table = Statistics
baked_query = _statistics_during_period_query(
hass, end_time, statistic_ids, bakery, base_query, table
@ -627,10 +722,20 @@ def statistics_during_period(
if not stats:
return {}
# Return statistics combined with metadata
return _sorted_statistics_to_dict(
hass, session, stats, statistic_ids, metadata, True, table, start_time
if period not in ("day", "month"):
return _sorted_statistics_to_dict(
hass, session, stats, statistic_ids, metadata, True, table, start_time
)
result = _sorted_statistics_to_dict(
hass, session, stats, statistic_ids, metadata, True, table, start_time, True
)
if period == "day":
return _reduce_statistics_per_day(result)
return _reduce_statistics_per_month(result)
def get_last_statistics(
hass: HomeAssistant, number_of_stats: int, statistic_id: str, convert_units: bool
@ -718,6 +823,7 @@ def _sorted_statistics_to_dict(
convert_units: bool,
table: type[Statistics | StatisticsShortTerm],
start_time: datetime | None,
start_time_as_datetime: bool = False,
) -> dict[str, list[dict]]:
"""Convert SQL results into JSON friendly data structure."""
result: dict = defaultdict(list)
@ -765,7 +871,7 @@ def _sorted_statistics_to_dict(
ent_results.append(
{
"statistic_id": statistic_id,
"start": start.isoformat(),
"start": start if start_time_as_datetime else start.isoformat(),
"end": end.isoformat(),
"mean": convert(db_state.mean, units),
"min": convert(db_state.min, units),

View File

@ -2017,16 +2017,19 @@ def test_compile_hourly_statistics_changing_statistics(
"db_supports_row_number,in_log,not_in_log",
[(True, "row_number", None), (False, None, "row_number")],
)
def test_compile_statistics_hourly_summary(
def test_compile_statistics_hourly_daily_monthly_summary(
hass_recorder, caplog, db_supports_row_number, in_log, not_in_log
):
"""Test compiling hourly statistics."""
"""Test compiling hourly statistics + monthly and daily summary."""
zero = dt_util.utcnow()
zero = zero.replace(minute=0, second=0, microsecond=0)
# Travel to the future, recorder gets confused otherwise because states are added
# before the start of the recorder_run
zero += timedelta(hours=1)
hass = hass_recorder()
# August 31st, 23:00 local time
zero = zero.replace(
year=2021, month=9, day=1, hour=5, minute=0, second=0, microsecond=0
)
with patch(
"homeassistant.components.recorder.models.dt_util.utcnow", return_value=zero
):
hass = hass_recorder()
recorder = hass.data[DATA_INSTANCE]
recorder._db_supports_row_number = db_supports_row_number
setup_component(hass, "sensor", {})
@ -2265,6 +2268,123 @@ def test_compile_statistics_hourly_summary(
start += timedelta(hours=1)
end += timedelta(hours=1)
assert stats == expected_stats
stats = statistics_during_period(hass, zero, period="day")
expected_stats = {
"sensor.test1": [],
"sensor.test2": [],
"sensor.test3": [],
"sensor.test4": [],
}
start = dt_util.parse_datetime("2021-08-31T06:00:00+00:00")
end = start + timedelta(days=1)
for i in range(2):
for entity_id in [
"sensor.test1",
"sensor.test2",
"sensor.test3",
"sensor.test4",
]:
expected_average = (
mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_averages
else None
)
expected_minimum = (
min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_minima
else None
)
expected_maximum = (
max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_maxima
else None
)
expected_state = (
expected_states[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_states
else None
)
expected_sum = (
expected_sums[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_sums
else None
)
expected_stats[entity_id].append(
{
"statistic_id": entity_id,
"start": process_timestamp_to_utc_isoformat(start),
"end": process_timestamp_to_utc_isoformat(end),
"mean": approx(expected_average),
"min": approx(expected_minimum),
"max": approx(expected_maximum),
"last_reset": None,
"state": expected_state,
"sum": expected_sum,
}
)
start += timedelta(days=1)
end += timedelta(days=1)
assert stats == expected_stats
stats = statistics_during_period(hass, zero, period="month")
expected_stats = {
"sensor.test1": [],
"sensor.test2": [],
"sensor.test3": [],
"sensor.test4": [],
}
start = dt_util.parse_datetime("2021-08-01T06:00:00+00:00")
end = dt_util.parse_datetime("2021-09-01T06:00:00+00:00")
for i in range(2):
for entity_id in [
"sensor.test1",
"sensor.test2",
"sensor.test3",
"sensor.test4",
]:
expected_average = (
mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_averages
else None
)
expected_minimum = (
min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_minima
else None
)
expected_maximum = (
max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_maxima
else None
)
expected_state = (
expected_states[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_states
else None
)
expected_sum = (
expected_sums[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_sums
else None
)
expected_stats[entity_id].append(
{
"statistic_id": entity_id,
"start": process_timestamp_to_utc_isoformat(start),
"end": process_timestamp_to_utc_isoformat(end),
"mean": approx(expected_average),
"min": approx(expected_minimum),
"max": approx(expected_maximum),
"last_reset": None,
"state": expected_state,
"sum": expected_sum,
}
)
start = (start + timedelta(days=31)).replace(day=1)
end = (end + timedelta(days=31)).replace(day=1)
assert stats == expected_stats
assert "Error while processing event StatisticsTask" not in caplog.text
if in_log:
assert in_log in caplog.text