1
mirror of https://github.com/home-assistant/core synced 2024-08-02 23:40:32 +02:00
ha-core/homeassistant/components/recorder/statistics.py
2021-10-12 10:39:36 -07:00

794 lines
27 KiB
Python

"""Statistics helper."""
from __future__ import annotations
from collections import defaultdict
from collections.abc import Callable, Iterable
import dataclasses
from datetime import datetime, timedelta
from itertools import chain, groupby
import logging
from typing import TYPE_CHECKING, Any, Literal
from sqlalchemy import bindparam, func
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext import baked
from sqlalchemy.orm.scoping import scoped_session
from sqlalchemy.sql.expression import true
from homeassistant.const import (
PRESSURE_PA,
TEMP_CELSIUS,
VOLUME_CUBIC_FEET,
VOLUME_CUBIC_METERS,
)
from homeassistant.core import Event, HomeAssistant, callback
from homeassistant.helpers import entity_registry
import homeassistant.util.dt as dt_util
import homeassistant.util.pressure as pressure_util
import homeassistant.util.temperature as temperature_util
from homeassistant.util.unit_system import UnitSystem
import homeassistant.util.volume as volume_util
from .const import DOMAIN
from .models import (
StatisticData,
StatisticMetaData,
StatisticResult,
Statistics,
StatisticsMeta,
StatisticsRuns,
StatisticsShortTerm,
process_timestamp,
process_timestamp_to_utc_isoformat,
)
from .util import execute, retryable_database_job, session_scope
if TYPE_CHECKING:
from . import Recorder
QUERY_STATISTICS = [
Statistics.metadata_id,
Statistics.start,
Statistics.mean,
Statistics.min,
Statistics.max,
Statistics.last_reset,
Statistics.state,
Statistics.sum,
]
QUERY_STATISTICS_SHORT_TERM = [
StatisticsShortTerm.metadata_id,
StatisticsShortTerm.start,
StatisticsShortTerm.mean,
StatisticsShortTerm.min,
StatisticsShortTerm.max,
StatisticsShortTerm.last_reset,
StatisticsShortTerm.state,
StatisticsShortTerm.sum,
]
QUERY_STATISTICS_SUMMARY_MEAN = [
StatisticsShortTerm.metadata_id,
func.avg(StatisticsShortTerm.mean),
func.min(StatisticsShortTerm.min),
func.max(StatisticsShortTerm.max),
]
QUERY_STATISTICS_SUMMARY_SUM = [
StatisticsShortTerm.metadata_id,
StatisticsShortTerm.start,
StatisticsShortTerm.last_reset,
StatisticsShortTerm.state,
StatisticsShortTerm.sum,
func.row_number()
.over(
partition_by=StatisticsShortTerm.metadata_id,
order_by=StatisticsShortTerm.start.desc(),
)
.label("rownum"),
]
QUERY_STATISTICS_SUMMARY_SUM_LEGACY = [
StatisticsShortTerm.metadata_id,
StatisticsShortTerm.last_reset,
StatisticsShortTerm.state,
StatisticsShortTerm.sum,
]
QUERY_STATISTIC_META = [
StatisticsMeta.id,
StatisticsMeta.statistic_id,
StatisticsMeta.unit_of_measurement,
StatisticsMeta.has_mean,
StatisticsMeta.has_sum,
]
QUERY_STATISTIC_META_ID = [
StatisticsMeta.id,
StatisticsMeta.statistic_id,
]
STATISTICS_BAKERY = "recorder_statistics_bakery"
STATISTICS_META_BAKERY = "recorder_statistics_meta_bakery"
STATISTICS_SHORT_TERM_BAKERY = "recorder_statistics_short_term_bakery"
# Convert pressure and temperature statistics from the native unit used for statistics
# to the units configured by the user
UNIT_CONVERSIONS = {
PRESSURE_PA: lambda x, units: pressure_util.convert(
x, PRESSURE_PA, units.pressure_unit
)
if x is not None
else None,
TEMP_CELSIUS: lambda x, units: temperature_util.convert(
x, TEMP_CELSIUS, units.temperature_unit
)
if x is not None
else None,
VOLUME_CUBIC_METERS: lambda x, units: volume_util.convert(
x, VOLUME_CUBIC_METERS, _configured_unit(VOLUME_CUBIC_METERS, units)
)
if x is not None
else None,
}
_LOGGER = logging.getLogger(__name__)
@dataclasses.dataclass
class ValidationIssue:
"""Error or warning message."""
type: str
data: dict[str, str | None] | None = None
def as_dict(self) -> dict:
"""Return dictionary version."""
return dataclasses.asdict(self)
def async_setup(hass: HomeAssistant) -> None:
"""Set up the history hooks."""
hass.data[STATISTICS_BAKERY] = baked.bakery()
hass.data[STATISTICS_META_BAKERY] = baked.bakery()
hass.data[STATISTICS_SHORT_TERM_BAKERY] = baked.bakery()
def entity_id_changed(event: Event) -> None:
"""Handle entity_id changed."""
old_entity_id = event.data["old_entity_id"]
entity_id = event.data["entity_id"]
with session_scope(hass=hass) as session:
session.query(StatisticsMeta).filter(
StatisticsMeta.statistic_id == old_entity_id
and StatisticsMeta.source == DOMAIN
).update({StatisticsMeta.statistic_id: entity_id})
@callback
def entity_registry_changed_filter(event: Event) -> bool:
"""Handle entity_id changed filter."""
if event.data["action"] != "update" or "old_entity_id" not in event.data:
return False
return True
if hass.is_running:
hass.bus.async_listen(
entity_registry.EVENT_ENTITY_REGISTRY_UPDATED,
entity_id_changed,
event_filter=entity_registry_changed_filter,
)
def get_start_time() -> datetime:
"""Return start time."""
now = dt_util.utcnow()
current_period_minutes = now.minute - now.minute % 5
current_period = now.replace(minute=current_period_minutes, second=0, microsecond=0)
last_period = current_period - timedelta(minutes=5)
return last_period
def _update_or_add_metadata(
hass: HomeAssistant,
session: scoped_session,
new_metadata: StatisticMetaData,
) -> int:
"""Get metadata_id for a statistic_id.
If the statistic_id is previously unknown, add it. If it's already known, update
metadata if needed.
Updating metadata source is not possible.
"""
statistic_id = new_metadata["statistic_id"]
old_metadata_dict = get_metadata_with_session(hass, session, [statistic_id], None)
if not old_metadata_dict:
unit = new_metadata["unit_of_measurement"]
has_mean = new_metadata["has_mean"]
has_sum = new_metadata["has_sum"]
meta = StatisticsMeta.from_meta(DOMAIN, statistic_id, unit, has_mean, has_sum)
session.add(meta)
session.flush() # Flush to get the metadata id assigned
_LOGGER.debug(
"Added new statistics metadata for %s, new_metadata: %s",
statistic_id,
new_metadata,
)
return meta.id # type: ignore[no-any-return]
metadata_id, old_metadata = old_metadata_dict[statistic_id]
if (
old_metadata["has_mean"] != new_metadata["has_mean"]
or old_metadata["has_sum"] != new_metadata["has_sum"]
or old_metadata["unit_of_measurement"] != new_metadata["unit_of_measurement"]
):
session.query(StatisticsMeta).filter_by(statistic_id=statistic_id).update(
{
StatisticsMeta.has_mean: new_metadata["has_mean"],
StatisticsMeta.has_sum: new_metadata["has_sum"],
StatisticsMeta.unit_of_measurement: new_metadata["unit_of_measurement"],
},
synchronize_session=False,
)
_LOGGER.debug(
"Updated statistics metadata for %s, old_metadata: %s, new_metadata: %s",
statistic_id,
old_metadata,
new_metadata,
)
return metadata_id
def compile_hourly_statistics(
instance: Recorder, session: scoped_session, start: datetime
) -> None:
"""Compile hourly statistics.
This will summarize 5-minute statistics for one hour:
- average, min max is computed by a database query
- sum is taken from the last 5-minute entry during the hour
"""
start_time = start.replace(minute=0)
end_time = start_time + timedelta(hours=1)
# Compute last hour's average, min, max
summary: dict[str, StatisticData] = {}
baked_query = instance.hass.data[STATISTICS_SHORT_TERM_BAKERY](
lambda session: session.query(*QUERY_STATISTICS_SUMMARY_MEAN)
)
baked_query += lambda q: q.filter(
StatisticsShortTerm.start >= bindparam("start_time")
)
baked_query += lambda q: q.filter(StatisticsShortTerm.start < bindparam("end_time"))
baked_query += lambda q: q.group_by(StatisticsShortTerm.metadata_id)
baked_query += lambda q: q.order_by(StatisticsShortTerm.metadata_id)
stats = execute(
baked_query(session).params(start_time=start_time, end_time=end_time)
)
if stats:
for stat in stats:
metadata_id, _mean, _min, _max = stat
summary[metadata_id] = {
"start": start_time,
"mean": _mean,
"min": _min,
"max": _max,
}
# Get last hour's last sum
if instance._db_supports_row_number: # pylint: disable=[protected-access]
subquery = (
session.query(*QUERY_STATISTICS_SUMMARY_SUM)
.filter(StatisticsShortTerm.start >= bindparam("start_time"))
.filter(StatisticsShortTerm.start < bindparam("end_time"))
.subquery()
)
query = (
session.query(subquery)
.filter(subquery.c.rownum == 1)
.order_by(subquery.c.metadata_id)
)
stats = execute(query.params(start_time=start_time, end_time=end_time))
if stats:
for stat in stats:
metadata_id, start, last_reset, state, _sum, _ = stat
if metadata_id in summary:
summary[metadata_id].update(
{
"last_reset": process_timestamp(last_reset),
"state": state,
"sum": _sum,
}
)
else:
summary[metadata_id] = {
"start": start_time,
"last_reset": process_timestamp(last_reset),
"state": state,
"sum": _sum,
}
else:
baked_query = instance.hass.data[STATISTICS_SHORT_TERM_BAKERY](
lambda session: session.query(*QUERY_STATISTICS_SUMMARY_SUM_LEGACY)
)
baked_query += lambda q: q.filter(
StatisticsShortTerm.start >= bindparam("start_time")
)
baked_query += lambda q: q.filter(
StatisticsShortTerm.start < bindparam("end_time")
)
baked_query += lambda q: q.order_by(
StatisticsShortTerm.metadata_id, StatisticsShortTerm.start.desc()
)
stats = execute(
baked_query(session).params(start_time=start_time, end_time=end_time)
)
if stats:
for metadata_id, group in groupby(stats, lambda stat: stat["metadata_id"]): # type: ignore
(
metadata_id,
last_reset,
state,
_sum,
) = next(group)
if metadata_id in summary:
summary[metadata_id].update(
{
"start": start_time,
"last_reset": process_timestamp(last_reset),
"state": state,
"sum": _sum,
}
)
else:
summary[metadata_id] = {
"start": start_time,
"last_reset": process_timestamp(last_reset),
"state": state,
"sum": _sum,
}
# Insert compiled hourly statistics in the database
for metadata_id, stat in summary.items():
session.add(Statistics.from_stats(metadata_id, stat))
@retryable_database_job("statistics")
def compile_statistics(instance: Recorder, start: datetime) -> bool:
"""Compile 5-minute statistics for all integrations with a recorder platform.
The actual calculation is delegated to the platforms.
"""
start = dt_util.as_utc(start)
end = start + timedelta(minutes=5)
# Return if we already have 5-minute statistics for the requested period
with session_scope(session=instance.get_session()) as session: # type: ignore
if session.query(StatisticsRuns).filter_by(start=start).first():
_LOGGER.debug("Statistics already compiled for %s-%s", start, end)
return True
_LOGGER.debug("Compiling statistics for %s-%s", start, end)
platform_stats: list[StatisticResult] = []
# Collect statistics from all platforms implementing support
for domain, platform in instance.hass.data[DOMAIN].items():
if not hasattr(platform, "compile_statistics"):
continue
platform_stat = platform.compile_statistics(instance.hass, start, end)
_LOGGER.debug(
"Statistics for %s during %s-%s: %s", domain, start, end, platform_stat
)
platform_stats.extend(platform_stat)
# Insert collected statistics in the database
with session_scope(session=instance.get_session()) as session: # type: ignore
for stats in platform_stats:
metadata_id = _update_or_add_metadata(instance.hass, session, stats["meta"])
for stat in stats["stat"]:
try:
session.add(StatisticsShortTerm.from_stats(metadata_id, stat))
except SQLAlchemyError:
_LOGGER.exception(
"Unexpected exception when inserting statistics %s:%s ",
metadata_id,
stats,
)
if start.minute == 55:
# A full hour is ready, summarize it
compile_hourly_statistics(instance, session, start)
session.add(StatisticsRuns(start=start))
return True
def get_metadata_with_session(
hass: HomeAssistant,
session: scoped_session,
statistic_ids: Iterable[str] | None,
statistic_type: Literal["mean"] | Literal["sum"] | None,
) -> dict[str, tuple[int, StatisticMetaData]]:
"""Fetch meta data.
Returns a dict of (metadata_id, StatisticMetaData) indexed by statistic_id.
If statistic_ids is given, fetch metadata only for the listed statistics_ids.
If statistic_type is given, fetch metadata only for statistic_ids supporting it.
"""
def _meta(metas: list, wanted_metadata_id: str) -> StatisticMetaData | None:
meta: StatisticMetaData | None = None
for metadata_id, statistic_id, unit, has_mean, has_sum in metas:
if metadata_id == wanted_metadata_id:
meta = {
"statistic_id": statistic_id,
"unit_of_measurement": unit,
"has_mean": has_mean,
"has_sum": has_sum,
}
return meta
# Fetch metatadata from the database
baked_query = hass.data[STATISTICS_META_BAKERY](
lambda session: session.query(*QUERY_STATISTIC_META)
)
if statistic_ids is not None:
baked_query += lambda q: q.filter(
StatisticsMeta.statistic_id.in_(bindparam("statistic_ids"))
)
if statistic_type == "mean":
baked_query += lambda q: q.filter(StatisticsMeta.has_mean == true())
elif statistic_type == "sum":
baked_query += lambda q: q.filter(StatisticsMeta.has_sum == true())
result = execute(baked_query(session).params(statistic_ids=statistic_ids))
if not result:
return {}
metadata_ids = [metadata[0] for metadata in result]
# Prepare the result dict
metadata: dict[str, tuple[int, StatisticMetaData]] = {}
for _id in metadata_ids:
meta = _meta(result, _id)
if meta:
metadata[meta["statistic_id"]] = (_id, meta)
return metadata
def get_metadata(
hass: HomeAssistant,
statistic_ids: Iterable[str],
) -> dict[str, tuple[int, StatisticMetaData]]:
"""Return metadata for statistic_ids."""
with session_scope(hass=hass) as session:
return get_metadata_with_session(hass, session, statistic_ids, None)
def _configured_unit(unit: str, units: UnitSystem) -> str:
"""Return the pressure and temperature units configured by the user."""
if unit == PRESSURE_PA:
return units.pressure_unit
if unit == TEMP_CELSIUS:
return units.temperature_unit
if unit == VOLUME_CUBIC_METERS:
if units.is_metric:
return VOLUME_CUBIC_METERS
return VOLUME_CUBIC_FEET
return unit
def clear_statistics(instance: Recorder, statistic_ids: list[str]) -> None:
"""Clear statistics for a list of statistic_ids."""
with session_scope(session=instance.get_session()) as session: # type: ignore
session.query(StatisticsMeta).filter(
StatisticsMeta.statistic_id.in_(statistic_ids)
).delete(synchronize_session=False)
def update_statistics_metadata(
instance: Recorder, statistic_id: str, unit_of_measurement: str | None
) -> None:
"""Update statistics metadata for a statistic_id."""
with session_scope(session=instance.get_session()) as session: # type: ignore
session.query(StatisticsMeta).filter(
StatisticsMeta.statistic_id == statistic_id
).update({StatisticsMeta.unit_of_measurement: unit_of_measurement})
def list_statistic_ids(
hass: HomeAssistant,
statistic_type: Literal["mean"] | Literal["sum"] | None = None,
) -> list[dict | None]:
"""Return all statistic_ids and unit of measurement.
Queries the database for existing statistic_ids, as well as integrations with
a recorder platform for statistic_ids which will be added in the next statistics
period.
"""
units = hass.config.units
statistic_ids = {}
# Query the database
with session_scope(hass=hass) as session:
metadata = get_metadata_with_session(hass, session, None, statistic_type)
for _, meta in metadata.values():
unit = meta["unit_of_measurement"]
if unit is not None:
# Display unit according to user settings
unit = _configured_unit(unit, units)
meta["unit_of_measurement"] = unit
statistic_ids = {
meta["statistic_id"]: meta["unit_of_measurement"]
for _, meta in metadata.values()
}
# Query all integrations with a registered recorder platform
for platform in hass.data[DOMAIN].values():
if not hasattr(platform, "list_statistic_ids"):
continue
platform_statistic_ids = platform.list_statistic_ids(hass, statistic_type)
for statistic_id, unit in platform_statistic_ids.items():
if unit is not None:
# Display unit according to user settings
unit = _configured_unit(unit, units)
platform_statistic_ids[statistic_id] = unit
for key, value in platform_statistic_ids.items():
statistic_ids.setdefault(key, value)
# Return a map of statistic_id to unit_of_measurement
return [
{"statistic_id": _id, "unit_of_measurement": unit}
for _id, unit in statistic_ids.items()
]
def _statistics_during_period_query(
hass: HomeAssistant,
end_time: datetime | None,
statistic_ids: list[str] | None,
bakery: Any,
base_query: Iterable,
table: type[Statistics | StatisticsShortTerm],
) -> Callable:
"""Prepare a database query for statistics during a given period.
This prepares a baked query, so we don't insert the parameters yet.
"""
baked_query = hass.data[bakery](lambda session: session.query(*base_query))
baked_query += lambda q: q.filter(table.start >= bindparam("start_time"))
if end_time is not None:
baked_query += lambda q: q.filter(table.start < bindparam("end_time"))
if statistic_ids is not None:
baked_query += lambda q: q.filter(
table.metadata_id.in_(bindparam("metadata_ids"))
)
baked_query += lambda q: q.order_by(table.metadata_id, table.start)
return baked_query # type: ignore[no-any-return]
def statistics_during_period(
hass: HomeAssistant,
start_time: datetime,
end_time: datetime | None = None,
statistic_ids: list[str] | None = None,
period: Literal["hour"] | Literal["5minute"] = "hour",
) -> dict[str, list[dict[str, str]]]:
"""Return statistics during UTC period start_time - end_time for the statistic_ids.
If end_time is omitted, returns statistics newer than or equal to start_time.
If statistic_ids is omitted, returns statistics for all statistics ids.
"""
metadata = None
with session_scope(hass=hass) as session:
# Fetch metadata for the given (or all) statistic_ids
metadata = get_metadata_with_session(hass, session, statistic_ids, None)
if not metadata:
return {}
metadata_ids = None
if statistic_ids is not None:
metadata_ids = [metadata_id for metadata_id, _ in metadata.values()]
if period == "hour":
bakery = STATISTICS_BAKERY
base_query = QUERY_STATISTICS
table = Statistics
else:
bakery = STATISTICS_SHORT_TERM_BAKERY
base_query = QUERY_STATISTICS_SHORT_TERM
table = StatisticsShortTerm
baked_query = _statistics_during_period_query(
hass, end_time, statistic_ids, bakery, base_query, table
)
stats = execute(
baked_query(session).params(
start_time=start_time, end_time=end_time, metadata_ids=metadata_ids
)
)
if not stats:
return {}
# Return statistics combined with metadata
return _sorted_statistics_to_dict(
hass, session, stats, statistic_ids, metadata, True, table, start_time
)
def get_last_statistics(
hass: HomeAssistant, number_of_stats: int, statistic_id: str, convert_units: bool
) -> dict[str, list[dict]]:
"""Return the last number_of_stats statistics for a given statistic_id."""
statistic_ids = [statistic_id]
with session_scope(hass=hass) as session:
# Fetch metadata for the given statistic_id
metadata = get_metadata_with_session(hass, session, statistic_ids, None)
if not metadata:
return {}
baked_query = hass.data[STATISTICS_SHORT_TERM_BAKERY](
lambda session: session.query(*QUERY_STATISTICS_SHORT_TERM)
)
baked_query += lambda q: q.filter_by(metadata_id=bindparam("metadata_id"))
metadata_id = metadata[statistic_id][0]
baked_query += lambda q: q.order_by(
StatisticsShortTerm.metadata_id, StatisticsShortTerm.start.desc()
)
baked_query += lambda q: q.limit(bindparam("number_of_stats"))
stats = execute(
baked_query(session).params(
number_of_stats=number_of_stats, metadata_id=metadata_id
)
)
if not stats:
return {}
# Return statistics combined with metadata
return _sorted_statistics_to_dict(
hass,
session,
stats,
statistic_ids,
metadata,
convert_units,
StatisticsShortTerm,
None,
)
def _statistics_at_time(
session: scoped_session,
metadata_ids: set[int],
table: type[Statistics | StatisticsShortTerm],
start_time: datetime,
) -> list | None:
"""Return last known statics, earlier than start_time, for the metadata_ids."""
# Fetch metadata for the given (or all) statistic_ids
if table == StatisticsShortTerm:
base_query = QUERY_STATISTICS_SHORT_TERM
else:
base_query = QUERY_STATISTICS
query = session.query(*base_query)
most_recent_statistic_ids = (
session.query(
func.max(table.id).label("max_id"),
)
.filter(table.start < start_time)
.filter(table.metadata_id.in_(metadata_ids))
)
most_recent_statistic_ids = most_recent_statistic_ids.group_by(table.metadata_id)
most_recent_statistic_ids = most_recent_statistic_ids.subquery()
query = query.join(
most_recent_statistic_ids,
table.id == most_recent_statistic_ids.c.max_id,
)
return execute(query)
def _sorted_statistics_to_dict(
hass: HomeAssistant,
session: scoped_session,
stats: list,
statistic_ids: list[str] | None,
_metadata: dict[str, tuple[int, StatisticMetaData]],
convert_units: bool,
table: type[Statistics | StatisticsShortTerm],
start_time: datetime | None,
) -> dict[str, list[dict]]:
"""Convert SQL results into JSON friendly data structure."""
result: dict = defaultdict(list)
units = hass.config.units
metadata = dict(_metadata.values())
need_stat_at_start_time = set()
stats_at_start_time = {}
def no_conversion(val: Any, _: Any) -> float | None:
"""Return x."""
return val # type: ignore
# Set all statistic IDs to empty lists in result set to maintain the order
if statistic_ids is not None:
for stat_id in statistic_ids:
result[stat_id] = []
# Identify metadata IDs for which no data was available at the requested start time
for meta_id, group in groupby(stats, lambda stat: stat.metadata_id): # type: ignore
first_start_time = process_timestamp(next(group).start)
if start_time and first_start_time > start_time:
need_stat_at_start_time.add(meta_id)
# Fetch last known statistics for the needed metadata IDs
if need_stat_at_start_time:
assert start_time # Can not be None if need_stat_at_start_time is not empty
tmp = _statistics_at_time(session, need_stat_at_start_time, table, start_time)
if tmp:
for stat in tmp:
stats_at_start_time[stat.metadata_id] = (stat,)
# Append all statistic entries, and optionally do unit conversion
for meta_id, group in groupby(stats, lambda stat: stat.metadata_id): # type: ignore
unit = metadata[meta_id]["unit_of_measurement"]
statistic_id = metadata[meta_id]["statistic_id"]
convert: Callable[[Any, Any], float | None]
if convert_units:
convert = UNIT_CONVERSIONS.get(unit, lambda x, units: x) # type: ignore
else:
convert = no_conversion
ent_results = result[meta_id]
for db_state in chain(stats_at_start_time.get(meta_id, ()), group):
start = process_timestamp(db_state.start)
end = start + table.duration
ent_results.append(
{
"statistic_id": statistic_id,
"start": start.isoformat(),
"end": end.isoformat(),
"mean": convert(db_state.mean, units),
"min": convert(db_state.min, units),
"max": convert(db_state.max, units),
"last_reset": process_timestamp_to_utc_isoformat(
db_state.last_reset
),
"state": convert(db_state.state, units),
"sum": convert(db_state.sum, units),
}
)
# Filter out the empty lists if some states had 0 results.
return {metadata[key]["statistic_id"]: val for key, val in result.items() if val}
def validate_statistics(hass: HomeAssistant) -> dict[str, list[ValidationIssue]]:
"""Validate statistics."""
platform_validation: dict[str, list[ValidationIssue]] = {}
for platform in hass.data[DOMAIN].values():
if not hasattr(platform, "validate_statistics"):
continue
platform_validation.update(platform.validate_statistics(hass))
return platform_validation