1
mirror of https://github.com/home-assistant/core synced 2024-09-25 00:41:32 +02:00

Resume entity id post migration after a restart (#90973)

* Restart entity id post migration after a restart

If the entity migration finished and Home Assistant was
restarted during the post migration it would never be resumed
which means the old index and space would never be recovered

* add migration resume test
This commit is contained in:
J. Nick Koston 2023-04-06 15:16:45 -10:00 committed by GitHub
parent d4c10f0a98
commit 398762fdd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 911 additions and 2 deletions

View File

@ -58,6 +58,7 @@ from .const import (
SupportedDialect,
)
from .db_schema import (
LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX,
LEGACY_STATES_EVENT_ID_INDEX,
SCHEMA_VERSION,
TABLE_STATES,
@ -96,6 +97,7 @@ from .tasks import (
CompileMissingStatisticsTask,
DatabaseLockTask,
EntityIDMigrationTask,
EntityIDPostMigrationTask,
EventIdMigrationTask,
EventsContextIDMigrationTask,
EventTask,
@ -757,6 +759,18 @@ class Recorder(threading.Thread):
else:
_LOGGER.debug("Activating states_meta manager as all data is migrated")
self.states_meta_manager.active = True
with contextlib.suppress(SQLAlchemyError):
# If ix_states_entity_id_last_updated_ts still exists
# on the states table it means the entity id migration
# finished by the EntityIDPostMigrationTask did not
# because they restarted in the middle of it. We need
# to pick back up where we left off.
if get_index_by_name(
session,
TABLE_STATES,
LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX,
):
self.queue_task(EntityIDPostMigrationTask())
if self.schema_version > LEGACY_STATES_EVENT_ID_INDEX_SCHEMA_VERSION:
with contextlib.suppress(SQLAlchemyError):

View File

@ -118,6 +118,7 @@ METADATA_ID_LAST_UPDATED_INDEX_TS = "ix_states_metadata_id_last_updated_ts"
EVENTS_CONTEXT_ID_BIN_INDEX = "ix_events_context_id_bin"
STATES_CONTEXT_ID_BIN_INDEX = "ix_states_context_id_bin"
LEGACY_STATES_EVENT_ID_INDEX = "ix_states_event_id"
LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX = "ix_states_entity_id_last_updated_ts"
CONTEXT_ID_BIN_MAX_LENGTH = 16
MYSQL_COLLATE = "utf8mb4_unicode_ci"

View File

@ -48,6 +48,7 @@ from .const import SupportedDialect
from .db_schema import (
CONTEXT_ID_BIN_MAX_LENGTH,
DOUBLE_PRECISION_TYPE_SQL,
LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX,
LEGACY_STATES_EVENT_ID_INDEX,
MYSQL_COLLATE,
MYSQL_DEFAULT_CHARSET,
@ -1586,7 +1587,7 @@ def post_migrate_entity_ids(instance: Recorder) -> bool:
if is_done:
# Drop the old indexes since they are no longer needed
_drop_index(session_maker, "states", "ix_states_entity_id_last_updated_ts")
_drop_index(session_maker, "states", LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX)
_LOGGER.debug("Cleanup legacy entity_ids done=%s", is_done)
return is_done

View File

@ -0,0 +1,752 @@
"""Models for SQLAlchemy.
This file contains the model definitions for schema version 30.
It is used to test the schema migration logic.
"""
from __future__ import annotations
from collections.abc import Callable
from datetime import datetime, timedelta
import logging
import time
from typing import Any, TypedDict, cast, overload
import ciso8601
from fnv_hash_fast import fnv1a_32
from sqlalchemy import (
JSON,
BigInteger,
Boolean,
Column,
DateTime,
Float,
ForeignKey,
Identity,
Index,
Integer,
LargeBinary,
SmallInteger,
String,
Text,
distinct,
type_coerce,
)
from sqlalchemy.dialects import mysql, oracle, postgresql, sqlite
from sqlalchemy.orm import aliased, declarative_base, relationship
from sqlalchemy.orm.session import Session
from typing_extensions import Self
from homeassistant.components.recorder.const import SupportedDialect
from homeassistant.const import (
ATTR_ATTRIBUTION,
ATTR_RESTORED,
ATTR_SUPPORTED_FEATURES,
MAX_LENGTH_EVENT_CONTEXT_ID,
MAX_LENGTH_EVENT_EVENT_TYPE,
MAX_LENGTH_EVENT_ORIGIN,
MAX_LENGTH_STATE_ENTITY_ID,
MAX_LENGTH_STATE_STATE,
)
from homeassistant.core import Context, Event, EventOrigin, State, split_entity_id
from homeassistant.helpers import entity_registry as er
from homeassistant.helpers.json import JSON_DUMP, json_bytes
import homeassistant.util.dt as dt_util
from homeassistant.util.json import JSON_DECODE_EXCEPTIONS, json_loads
ALL_DOMAIN_EXCLUDE_ATTRS = {ATTR_ATTRIBUTION, ATTR_RESTORED, ATTR_SUPPORTED_FEATURES}
# SQLAlchemy Schema
# pylint: disable=invalid-name
Base = declarative_base()
SCHEMA_VERSION = 30
_LOGGER = logging.getLogger(__name__)
TABLE_EVENTS = "events"
TABLE_EVENT_DATA = "event_data"
TABLE_EVENT_TYPES = "event_types"
TABLE_STATES = "states"
TABLE_STATE_ATTRIBUTES = "state_attributes"
TABLE_STATES_META = "states_meta"
TABLE_RECORDER_RUNS = "recorder_runs"
TABLE_SCHEMA_CHANGES = "schema_changes"
TABLE_STATISTICS = "statistics"
TABLE_STATISTICS_META = "statistics_meta"
TABLE_STATISTICS_RUNS = "statistics_runs"
TABLE_STATISTICS_SHORT_TERM = "statistics_short_term"
ALL_TABLES = [
TABLE_STATES,
TABLE_STATE_ATTRIBUTES,
TABLE_STATES_META,
TABLE_EVENTS,
TABLE_EVENT_DATA,
TABLE_EVENT_TYPES,
TABLE_RECORDER_RUNS,
TABLE_SCHEMA_CHANGES,
TABLE_STATISTICS,
TABLE_STATISTICS_META,
TABLE_STATISTICS_RUNS,
TABLE_STATISTICS_SHORT_TERM,
]
TABLES_TO_CHECK = [
TABLE_STATES,
TABLE_EVENTS,
TABLE_RECORDER_RUNS,
TABLE_SCHEMA_CHANGES,
]
LAST_UPDATED_INDEX = "ix_states_last_updated"
ENTITY_ID_LAST_UPDATED_TS_INDEX = "ix_states_entity_id_last_updated_ts"
EVENTS_CONTEXT_ID_INDEX = "ix_events_context_id"
STATES_CONTEXT_ID_INDEX = "ix_states_context_id"
CONTEXT_ID_BIN_MAX_LENGTH = 16
EVENTS_CONTEXT_ID_BIN_INDEX = "ix_events_context_id_bin"
STATES_CONTEXT_ID_BIN_INDEX = "ix_states_context_id_bin"
class FAST_PYSQLITE_DATETIME(sqlite.DATETIME): # type: ignore[misc]
"""Use ciso8601 to parse datetimes instead of sqlalchemy built-in regex."""
def result_processor(self, dialect, coltype): # type: ignore[no-untyped-def]
"""Offload the datetime parsing to ciso8601."""
return lambda value: None if value is None else ciso8601.parse_datetime(value)
JSON_VARIANT_CAST = Text().with_variant(
postgresql.JSON(none_as_null=True), "postgresql"
)
JSONB_VARIANT_CAST = Text().with_variant(
postgresql.JSONB(none_as_null=True), "postgresql"
)
DATETIME_TYPE = (
DateTime(timezone=True)
.with_variant(mysql.DATETIME(timezone=True, fsp=6), "mysql")
.with_variant(FAST_PYSQLITE_DATETIME(), "sqlite")
)
DOUBLE_TYPE = (
Float()
.with_variant(mysql.DOUBLE(asdecimal=False), "mysql")
.with_variant(oracle.DOUBLE_PRECISION(), "oracle")
.with_variant(postgresql.DOUBLE_PRECISION(), "postgresql")
)
TIMESTAMP_TYPE = DOUBLE_TYPE
class UnsupportedDialect(Exception):
"""The dialect or its version is not supported."""
class StatisticResult(TypedDict):
"""Statistic result data class.
Allows multiple datapoints for the same statistic_id.
"""
meta: StatisticMetaData
stat: StatisticData
class StatisticDataBase(TypedDict):
"""Mandatory fields for statistic data class."""
start: datetime
class StatisticData(StatisticDataBase, total=False):
"""Statistic data class."""
mean: float
min: float
max: float
last_reset: datetime | None
state: float
sum: float
class StatisticMetaData(TypedDict):
"""Statistic meta data class."""
has_mean: bool
has_sum: bool
name: str | None
source: str
statistic_id: str
unit_of_measurement: str | None
class JSONLiteral(JSON): # type: ignore[misc]
"""Teach SA how to literalize json."""
def literal_processor(self, dialect: str) -> Callable[[Any], str]:
"""Processor to convert a value to JSON."""
def process(value: Any) -> str:
"""Dump json."""
return JSON_DUMP(value)
return process
EVENT_ORIGIN_ORDER = [EventOrigin.local, EventOrigin.remote]
EVENT_ORIGIN_TO_IDX = {origin: idx for idx, origin in enumerate(EVENT_ORIGIN_ORDER)}
class Events(Base): # type: ignore[misc,valid-type]
"""Event history data."""
__table_args__ = (
# Used for fetching events at a specific time
# see logbook
Index("ix_events_event_type_time_fired", "event_type", "time_fired"),
Index(
EVENTS_CONTEXT_ID_BIN_INDEX,
"context_id_bin",
mysql_length=CONTEXT_ID_BIN_MAX_LENGTH,
mariadb_length=CONTEXT_ID_BIN_MAX_LENGTH,
),
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_EVENTS
event_id = Column(Integer, Identity(), primary_key=True)
event_type = Column(String(MAX_LENGTH_EVENT_EVENT_TYPE))
event_data = Column(Text().with_variant(mysql.LONGTEXT, "mysql"))
origin = Column(String(MAX_LENGTH_EVENT_ORIGIN)) # no longer used for new rows
origin_idx = Column(SmallInteger)
time_fired = Column(DATETIME_TYPE, index=True)
time_fired_ts = Column(TIMESTAMP_TYPE, index=True)
context_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True)
context_user_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
context_parent_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
data_id = Column(Integer, ForeignKey("event_data.data_id"), index=True)
context_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v3v320, only added for recorder to startup ok
context_user_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v32, only added for recorder to startup ok
context_parent_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v32, only added for recorder to startup ok
event_type_id = Column(
Integer, ForeignKey("event_types.event_type_id"), index=True
) # *** Not originally in v32, only added for recorder to startup ok
event_data_rel = relationship("EventData")
event_type_rel = relationship("EventTypes")
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.Events("
f"id={self.event_id}, type='{self.event_type}', "
f"origin_idx='{self.origin_idx}', time_fired='{self.time_fired}'"
f", data_id={self.data_id})>"
)
@staticmethod
def from_event(event: Event) -> Events:
"""Create an event database object from a native event."""
return Events(
event_type=event.event_type,
event_data=None,
origin_idx=EVENT_ORIGIN_TO_IDX.get(event.origin),
time_fired=event.time_fired,
context_id=event.context.id,
context_user_id=event.context.user_id,
context_parent_id=event.context.parent_id,
)
def to_native(self, validate_entity_id: bool = True) -> Event | None:
"""Convert to a native HA Event."""
context = Context(
id=self.context_id,
user_id=self.context_user_id,
parent_id=self.context_parent_id,
)
try:
return Event(
self.event_type,
json_loads(self.event_data) if self.event_data else {},
EventOrigin(self.origin)
if self.origin
else EVENT_ORIGIN_ORDER[self.origin_idx],
process_timestamp(self.time_fired),
context=context,
)
except JSON_DECODE_EXCEPTIONS:
# When json_loads fails
_LOGGER.exception("Error converting to event: %s", self)
return None
class EventData(Base): # type: ignore[misc,valid-type]
"""Event data history."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_EVENT_DATA
data_id = Column(Integer, Identity(), primary_key=True)
hash = Column(BigInteger, index=True)
# Note that this is not named attributes to avoid confusion with the states table
shared_data = Column(Text().with_variant(mysql.LONGTEXT, "mysql"))
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.EventData("
f"id={self.data_id}, hash='{self.hash}', data='{self.shared_data}'"
")>"
)
@staticmethod
def from_event(event: Event) -> EventData:
"""Create object from an event."""
shared_data = json_bytes(event.data)
return EventData(
shared_data=shared_data.decode("utf-8"),
hash=EventData.hash_shared_data_bytes(shared_data),
)
@staticmethod
def shared_data_bytes_from_event(
event: Event, dialect: SupportedDialect | None
) -> bytes:
"""Create shared_data from an event."""
return json_bytes(event.data)
@staticmethod
def hash_shared_data_bytes(shared_data_bytes: bytes) -> int:
"""Return the hash of json encoded shared data."""
return cast(int, fnv1a_32(shared_data_bytes))
def to_native(self) -> dict[str, Any]:
"""Convert to an HA state object."""
try:
return cast(dict[str, Any], json_loads(self.shared_data))
except JSON_DECODE_EXCEPTIONS:
_LOGGER.exception("Error converting row to event data: %s", self)
return {}
# *** Not originally in v32, only added for recorder to startup ok
# This is not being tested by the v32 statistics migration tests
class EventTypes(Base): # type: ignore[misc,valid-type]
"""Event type history."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_EVENT_TYPES
event_type_id = Column(Integer, Identity(), primary_key=True)
event_type = Column(String(MAX_LENGTH_EVENT_EVENT_TYPE))
class States(Base): # type: ignore[misc,valid-type]
"""State change history."""
__table_args__ = (
# Used for fetching the state of entities at a specific time
# (get_states in history.py)
Index(ENTITY_ID_LAST_UPDATED_TS_INDEX, "entity_id", "last_updated_ts"),
Index(
STATES_CONTEXT_ID_BIN_INDEX,
"context_id_bin",
mysql_length=CONTEXT_ID_BIN_MAX_LENGTH,
mariadb_length=CONTEXT_ID_BIN_MAX_LENGTH,
),
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_STATES
state_id = Column(Integer, Identity(), primary_key=True)
entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID))
state = Column(String(MAX_LENGTH_STATE_STATE))
attributes = Column(
Text().with_variant(mysql.LONGTEXT, "mysql")
) # no longer used for new rows
event_id = Column( # no longer used for new rows
Integer, ForeignKey("events.event_id", ondelete="CASCADE"), index=True
)
last_changed = Column(DATETIME_TYPE)
last_changed_ts = Column(TIMESTAMP_TYPE)
last_updated = Column(DATETIME_TYPE, default=dt_util.utcnow, index=True)
last_updated_ts = Column(TIMESTAMP_TYPE, default=time.time, index=True)
old_state_id = Column(Integer, ForeignKey("states.state_id"), index=True)
attributes_id = Column(
Integer, ForeignKey("state_attributes.attributes_id"), index=True
)
context_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True)
context_user_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
context_parent_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
origin_idx = Column(SmallInteger) # 0 is local, 1 is remote
context_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v32, only added for recorder to startup ok
context_user_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v32, only added for recorder to startup ok
context_parent_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v32, only added for recorder to startup ok
metadata_id = Column(
Integer, ForeignKey("states_meta.metadata_id"), index=True
) # *** Not originally in v32, only added for recorder to startup ok
states_meta_rel = relationship("StatesMeta")
old_state = relationship("States", remote_side=[state_id])
state_attributes = relationship("StateAttributes")
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
f"<recorder.States(id={self.state_id}, entity_id='{self.entity_id}',"
f" state='{self.state}', event_id='{self.event_id}',"
f" last_updated='{self.last_updated.isoformat(sep=' ', timespec='seconds')}',"
f" old_state_id={self.old_state_id}, attributes_id={self.attributes_id})>"
)
@staticmethod
def from_event(event: Event) -> States:
"""Create object from a state_changed event."""
entity_id = event.data["entity_id"]
state: State | None = event.data.get("new_state")
dbstate = States(
entity_id=entity_id,
attributes=None,
context_id=event.context.id,
context_user_id=event.context.user_id,
context_parent_id=event.context.parent_id,
origin_idx=EVENT_ORIGIN_TO_IDX.get(event.origin),
)
# None state means the state was removed from the state machine
if state is None:
dbstate.state = ""
dbstate.last_updated = event.time_fired
dbstate.last_changed = None
return dbstate
dbstate.state = state.state
dbstate.last_updated = state.last_updated
if state.last_updated == state.last_changed:
dbstate.last_changed = None
else:
dbstate.last_changed = state.last_changed
return dbstate
def to_native(self, validate_entity_id: bool = True) -> State | None:
"""Convert to an HA state object."""
context = Context(
id=self.context_id,
user_id=self.context_user_id,
parent_id=self.context_parent_id,
)
try:
attrs = json_loads(self.attributes) if self.attributes else {}
except JSON_DECODE_EXCEPTIONS:
# When json_loads fails
_LOGGER.exception("Error converting row to state: %s", self)
return None
if self.last_changed is None or self.last_changed == self.last_updated:
last_changed = last_updated = process_timestamp(self.last_updated)
else:
last_updated = process_timestamp(self.last_updated)
last_changed = process_timestamp(self.last_changed)
return State(
self.entity_id,
self.state,
# Join the state_attributes table on attributes_id to get the attributes
# for newer states
attrs,
last_changed,
last_updated,
context=context,
validate_entity_id=validate_entity_id,
)
class StateAttributes(Base): # type: ignore[misc,valid-type]
"""State attribute change history."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_STATE_ATTRIBUTES
attributes_id = Column(Integer, Identity(), primary_key=True)
hash = Column(BigInteger, index=True)
# Note that this is not named attributes to avoid confusion with the states table
shared_attrs = Column(Text().with_variant(mysql.LONGTEXT, "mysql"))
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
f"<recorder.StateAttributes(id={self.attributes_id}, hash='{self.hash}',"
f" attributes='{self.shared_attrs}')>"
)
@staticmethod
def from_event(event: Event) -> StateAttributes:
"""Create object from a state_changed event."""
state: State | None = event.data.get("new_state")
# None state means the state was removed from the state machine
attr_bytes = b"{}" if state is None else json_bytes(state.attributes)
dbstate = StateAttributes(shared_attrs=attr_bytes.decode("utf-8"))
dbstate.hash = StateAttributes.hash_shared_attrs_bytes(attr_bytes)
return dbstate
@staticmethod
def shared_attrs_bytes_from_event(
event: Event,
entity_registry: er.EntityRegistry,
exclude_attrs_by_domain: dict[str, set[str]],
dialect: SupportedDialect | None,
) -> bytes:
"""Create shared_attrs from a state_changed event."""
state: State | None = event.data.get("new_state")
# None state means the state was removed from the state machine
if state is None:
return b"{}"
domain = split_entity_id(state.entity_id)[0]
exclude_attrs = (
exclude_attrs_by_domain.get(domain, set()) | ALL_DOMAIN_EXCLUDE_ATTRS
)
return json_bytes(
{k: v for k, v in state.attributes.items() if k not in exclude_attrs}
)
@staticmethod
def hash_shared_attrs_bytes(shared_attrs_bytes: bytes) -> int:
"""Return the hash of json encoded shared attributes."""
return cast(int, fnv1a_32(shared_attrs_bytes))
def to_native(self) -> dict[str, Any]:
"""Convert to an HA state object."""
try:
return cast(dict[str, Any], json_loads(self.shared_attrs))
except JSON_DECODE_EXCEPTIONS:
# When json_loads fails
_LOGGER.exception("Error converting row to state attributes: %s", self)
return {}
# *** Not originally in v30, only added for recorder to startup ok
# This is not being tested by the v30 statistics migration tests
class StatesMeta(Base): # type: ignore[misc,valid-type]
"""Metadata for states."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_STATES_META
metadata_id = Column(Integer, Identity(), primary_key=True)
entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID))
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.StatesMeta("
f"id={self.metadata_id}, entity_id='{self.entity_id}'"
")>"
)
class StatisticsBase:
"""Statistics base class."""
id = Column(Integer, Identity(), primary_key=True)
created = Column(DATETIME_TYPE, default=dt_util.utcnow)
metadata_id = Column(
Integer,
ForeignKey(f"{TABLE_STATISTICS_META}.id", ondelete="CASCADE"),
index=True,
)
start = Column(DATETIME_TYPE, index=True)
mean = Column(DOUBLE_TYPE)
min = Column(DOUBLE_TYPE)
max = Column(DOUBLE_TYPE)
last_reset = Column(DATETIME_TYPE)
state = Column(DOUBLE_TYPE)
sum = Column(DOUBLE_TYPE)
@classmethod
def from_stats(cls, metadata_id: int, stats: StatisticData) -> Self:
"""Create object from a statistics."""
return cls( # type: ignore[call-arg,misc]
metadata_id=metadata_id,
**stats,
)
class Statistics(Base, StatisticsBase): # type: ignore[misc,valid-type]
"""Long term statistics."""
duration = timedelta(hours=1)
__table_args__ = (
# Used for fetching statistics for a certain entity at a specific time
Index("ix_statistics_statistic_id_start", "metadata_id", "start", unique=True),
)
__tablename__ = TABLE_STATISTICS
class StatisticsShortTerm(Base, StatisticsBase): # type: ignore[misc,valid-type]
"""Short term statistics."""
duration = timedelta(minutes=5)
__table_args__ = (
# Used for fetching statistics for a certain entity at a specific time
Index(
"ix_statistics_short_term_statistic_id_start",
"metadata_id",
"start",
unique=True,
),
)
__tablename__ = TABLE_STATISTICS_SHORT_TERM
class StatisticsMeta(Base): # type: ignore[misc,valid-type]
"""Statistics meta data."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_STATISTICS_META
id = Column(Integer, Identity(), primary_key=True)
statistic_id = Column(String(255), index=True, unique=True)
source = Column(String(32))
unit_of_measurement = Column(String(255))
has_mean = Column(Boolean)
has_sum = Column(Boolean)
name = Column(String(255))
@staticmethod
def from_meta(meta: StatisticMetaData) -> StatisticsMeta:
"""Create object from meta data."""
return StatisticsMeta(**meta)
class RecorderRuns(Base): # type: ignore[misc,valid-type]
"""Representation of recorder run."""
__table_args__ = (Index("ix_recorder_runs_start_end", "start", "end"),)
__tablename__ = TABLE_RECORDER_RUNS
run_id = Column(Integer, Identity(), primary_key=True)
start = Column(DATETIME_TYPE, default=dt_util.utcnow)
end = Column(DATETIME_TYPE)
closed_incorrect = Column(Boolean, default=False)
created = Column(DATETIME_TYPE, default=dt_util.utcnow)
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
end = (
f"'{self.end.isoformat(sep=' ', timespec='seconds')}'" if self.end else None
)
return (
f"<recorder.RecorderRuns(id={self.run_id},"
f" start='{self.start.isoformat(sep=' ', timespec='seconds')}', end={end},"
f" closed_incorrect={self.closed_incorrect},"
f" created='{self.created.isoformat(sep=' ', timespec='seconds')}')>"
)
def entity_ids(self, point_in_time: datetime | None = None) -> list[str]:
"""Return the entity ids that existed in this run.
Specify point_in_time if you want to know which existed at that point
in time inside the run.
"""
session = Session.object_session(self)
assert session is not None, "RecorderRuns need to be persisted"
query = session.query(distinct(States.entity_id)).filter(
States.last_updated >= self.start
)
if point_in_time is not None:
query = query.filter(States.last_updated < point_in_time)
elif self.end is not None:
query = query.filter(States.last_updated < self.end)
return [row[0] for row in query]
def to_native(self, validate_entity_id: bool = True) -> RecorderRuns:
"""Return self, native format is this model."""
return self
class SchemaChanges(Base): # type: ignore[misc,valid-type]
"""Representation of schema version changes."""
__tablename__ = TABLE_SCHEMA_CHANGES
change_id = Column(Integer, Identity(), primary_key=True)
schema_version = Column(Integer)
changed = Column(DATETIME_TYPE, default=dt_util.utcnow)
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.SchemaChanges("
f"id={self.change_id}, schema_version={self.schema_version}, "
f"changed='{self.changed.isoformat(sep=' ', timespec='seconds')}'"
")>"
)
class StatisticsRuns(Base): # type: ignore[misc,valid-type]
"""Representation of statistics run."""
__tablename__ = TABLE_STATISTICS_RUNS
run_id = Column(Integer, Identity(), primary_key=True)
start = Column(DATETIME_TYPE, index=True)
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
f"<recorder.StatisticsRuns(id={self.run_id},"
f" start='{self.start.isoformat(sep=' ', timespec='seconds')}', )>"
)
EVENT_DATA_JSON = type_coerce(
EventData.shared_data.cast(JSONB_VARIANT_CAST), JSONLiteral(none_as_null=True)
)
OLD_FORMAT_EVENT_DATA_JSON = type_coerce(
Events.event_data.cast(JSONB_VARIANT_CAST), JSONLiteral(none_as_null=True)
)
SHARED_ATTRS_JSON = type_coerce(
StateAttributes.shared_attrs.cast(JSON_VARIANT_CAST), JSON(none_as_null=True)
)
OLD_FORMAT_ATTRS_JSON = type_coerce(
States.attributes.cast(JSON_VARIANT_CAST), JSON(none_as_null=True)
)
ENTITY_ID_IN_EVENT: Column = EVENT_DATA_JSON["entity_id"]
OLD_ENTITY_ID_IN_EVENT: Column = OLD_FORMAT_EVENT_DATA_JSON["entity_id"]
DEVICE_ID_IN_EVENT: Column = EVENT_DATA_JSON["device_id"]
OLD_STATE = aliased(States, name="old_state")
@overload
def process_timestamp(ts: None) -> None:
...
@overload
def process_timestamp(ts: datetime) -> datetime:
...
def process_timestamp(ts: datetime | None) -> datetime | None:
"""Process a timestamp into datetime object."""
if ts is None:
return None
if ts.tzinfo is None:
return ts.replace(tzinfo=dt_util.UTC)
return dt_util.as_utc(ts)

View File

@ -27,7 +27,7 @@ from tests.common import async_test_home_assistant
ORIG_TZ = dt_util.DEFAULT_TIME_ZONE
CREATE_ENGINE_TARGET = "homeassistant.components.recorder.core.create_engine"
SCHEMA_MODULE = "tests.components.recorder.db_schema_30"
SCHEMA_MODULE = "tests.components.recorder.db_schema_32"
def _create_engine_test(*args, **kwargs):
@ -222,3 +222,144 @@ async def test_migrate_times(
await hass.async_stop()
dt_util.DEFAULT_TIME_ZONE = ORIG_TZ
async def test_migrate_can_resume_entity_id_post_migration(
caplog: pytest.LogCaptureFixture, tmpdir: py.path.local
) -> None:
"""Test we resume the entity id post migration after a restart."""
test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db")
dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}"
importlib.import_module(SCHEMA_MODULE)
old_db_schema = sys.modules[SCHEMA_MODULE]
now = dt_util.utcnow()
one_second_past = now - timedelta(seconds=1)
mock_state = State(
"sensor.test",
"old",
{"last_reset": now.isoformat()},
last_changed=one_second_past,
last_updated=now,
)
state_changed_event = Event(
EVENT_STATE_CHANGED,
{
"entity_id": "sensor.test",
"old_state": None,
"new_state": mock_state,
},
EventOrigin.local,
time_fired=now,
)
custom_event = Event(
"custom_event",
{"entity_id": "sensor.custom"},
EventOrigin.local,
time_fired=now,
)
number_of_migrations = 5
def _get_states_index_names():
with session_scope(hass=hass) as session:
return inspect(session.connection()).get_indexes("states")
with patch.object(recorder, "db_schema", old_db_schema), patch.object(
recorder.migration, "SCHEMA_VERSION", old_db_schema.SCHEMA_VERSION
), patch.object(core, "StatesMeta", old_db_schema.StatesMeta), patch.object(
core, "EventTypes", old_db_schema.EventTypes
), patch.object(
core, "EventData", old_db_schema.EventData
), patch.object(
core, "States", old_db_schema.States
), patch.object(
core, "Events", old_db_schema.Events
), patch(
CREATE_ENGINE_TARGET, new=_create_engine_test
), patch(
"homeassistant.components.recorder.Recorder._migrate_events_context_ids",
), patch(
"homeassistant.components.recorder.Recorder._migrate_states_context_ids",
), patch(
"homeassistant.components.recorder.Recorder._migrate_event_type_ids",
), patch(
"homeassistant.components.recorder.Recorder._migrate_entity_ids",
), patch(
"homeassistant.components.recorder.Recorder._post_migrate_entity_ids"
), patch(
"homeassistant.components.recorder.Recorder._cleanup_legacy_states_event_ids"
):
hass = await async_test_home_assistant(asyncio.get_running_loop())
recorder_helper.async_initialize_recorder(hass)
assert await async_setup_component(
hass, "recorder", {"recorder": {"db_url": dburl}}
)
await hass.async_block_till_done()
await async_wait_recording_done(hass)
await async_wait_recording_done(hass)
def _add_data():
with session_scope(hass=hass) as session:
session.add(old_db_schema.Events.from_event(custom_event))
session.add(old_db_schema.States.from_event(state_changed_event))
await recorder.get_instance(hass).async_add_executor_job(_add_data)
await hass.async_block_till_done()
await recorder.get_instance(hass).async_block_till_done()
states_indexes = await recorder.get_instance(hass).async_add_executor_job(
_get_states_index_names
)
states_index_names = {index["name"] for index in states_indexes}
assert recorder.get_instance(hass).use_legacy_events_index is True
await hass.async_stop()
await hass.async_block_till_done()
assert "ix_states_event_id" in states_index_names
assert "ix_states_entity_id_last_updated_ts" in states_index_names
with patch("homeassistant.components.recorder.Recorder._post_migrate_entity_ids"):
hass = await async_test_home_assistant(asyncio.get_running_loop())
recorder_helper.async_initialize_recorder(hass)
assert await async_setup_component(
hass, "recorder", {"recorder": {"db_url": dburl}}
)
await hass.async_block_till_done()
# We need to wait for all the migration tasks to complete
# before we can check the database.
for _ in range(number_of_migrations):
await recorder.get_instance(hass).async_block_till_done()
await async_wait_recording_done(hass)
states_indexes = await recorder.get_instance(hass).async_add_executor_job(
_get_states_index_names
)
states_index_names = {index["name"] for index in states_indexes}
await hass.async_stop()
await hass.async_block_till_done()
assert "ix_states_entity_id_last_updated_ts" in states_index_names
hass = await async_test_home_assistant(asyncio.get_running_loop())
recorder_helper.async_initialize_recorder(hass)
assert await async_setup_component(
hass, "recorder", {"recorder": {"db_url": dburl}}
)
await hass.async_block_till_done()
# We need to wait for all the migration tasks to complete
# before we can check the database.
for _ in range(number_of_migrations):
await recorder.get_instance(hass).async_block_till_done()
await async_wait_recording_done(hass)
states_indexes = await recorder.get_instance(hass).async_add_executor_job(
_get_states_index_names
)
states_index_names = {index["name"] for index in states_indexes}
assert "ix_states_entity_id_last_updated_ts" not in states_index_names
await hass.async_stop()
dt_util.DEFAULT_TIME_ZONE = ORIG_TZ