Watchdog addon on successful but unexpected exit (#3815)

This commit is contained in:
Mike Degatano 2022-08-22 20:29:27 -04:00 committed by GitHub
parent 024b813865
commit f4811a0243
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 123 additions and 126 deletions

View File

@ -112,6 +112,9 @@ class Addon(AddonModel):
super().__init__(coresys, slug)
self.instance: DockerAddon = DockerAddon(coresys, self)
self._state: AddonState = AddonState.UNKNOWN
self._manual_stop: bool = (
self.sys_hardware.helper.last_boot == self.sys_config.last_boot
)
@Job(
name=f"addon_{slug}_restart_after_problem",
@ -682,6 +685,7 @@ class Addon(AddonModel):
async def stop(self) -> None:
"""Stop add-on."""
self._manual_stop = True
try:
await self.instance.stop()
except DockerError as err:
@ -950,6 +954,7 @@ class Addon(AddonModel):
ContainerState.HEALTHY,
ContainerState.UNHEALTHY,
]:
self._manual_stop = False
self.state = AddonState.STARTED
elif event.state == ContainerState.STOPPED:
self.state = AddonState.STOPPED
@ -958,8 +963,15 @@ class Addon(AddonModel):
async def watchdog_container(self, event: DockerContainerStateEvent) -> None:
"""Process state changes in addon container and restart if necessary."""
if not (event.name == self.instance.name and self.watchdog):
if (
not (event.name == self.instance.name and self.watchdog)
or self._manual_stop
):
return
if event.state in [ContainerState.FAILED, ContainerState.UNHEALTHY]:
if event.state in [
ContainerState.FAILED,
ContainerState.STOPPED,
ContainerState.UNHEALTHY,
]:
await self._restart_after_problem(self, event.state)

View File

@ -41,7 +41,7 @@ class HwHelper(CoreSysAttributes):
return bool(self.sys_hardware.filter_devices(subsystem=UdevSubsystem.USB))
@property
def last_boot(self) -> str | None:
def last_boot(self) -> datetime | None:
"""Return last boot time."""
try:
stats: str = _PROC_STAT.read_text(encoding="utf-8")

View File

@ -1,6 +1,7 @@
"""Test Home Assistant Add-ons."""
import asyncio
from datetime import timedelta
from unittest.mock import MagicMock, PropertyMock, patch
from docker.errors import DockerException
@ -9,13 +10,38 @@ import pytest
from supervisor.addons.addon import Addon
from supervisor.const import AddonState, BusEvent
from supervisor.coresys import CoreSys
from supervisor.docker.addon import DockerAddon
from supervisor.docker.const import ContainerState
from supervisor.docker.monitor import DockerContainerStateEvent
from supervisor.exceptions import AddonsJobError, AudioUpdateError
from supervisor.store.repository import Repository
from supervisor.utils.dt import utcnow
from ..const import TEST_ADDON_SLUG
def _fire_test_event(coresys: CoreSys, name: str, state: ContainerState):
"""Fire a test event."""
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=name,
state=state,
id="abc123",
time=1,
),
)
async def mock_current_state(state: ContainerState) -> ContainerState:
"""Mock for current state method."""
return state
async def mock_stop() -> None:
"""Mock for stop method."""
def test_options_merge(coresys: CoreSys, install_addon_ssh: Addon) -> None:
"""Test options merge."""
addon = coresys.addons.get(TEST_ADDON_SLUG)
@ -71,174 +97,107 @@ def test_options_merge(coresys: CoreSys, install_addon_ssh: Addon) -> None:
async def test_addon_state_listener(coresys: CoreSys, install_addon_ssh: Addon) -> None:
"""Test addon is setting state from docker events."""
with patch.object(type(install_addon_ssh.instance), "attach"):
with patch.object(DockerAddon, "attach"):
await install_addon_ssh.load()
assert install_addon_ssh.state == AddonState.UNKNOWN
with patch.object(type(install_addon_ssh), "watchdog_container"):
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.RUNNING,
id="abc123",
time=1,
),
)
with patch.object(Addon, "watchdog_container"):
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
await asyncio.sleep(0)
assert install_addon_ssh.state == AddonState.STARTED
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.STOPPED,
id="abc123",
time=1,
),
)
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
await asyncio.sleep(0)
assert install_addon_ssh.state == AddonState.STOPPED
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.HEALTHY,
id="abc123",
time=1,
),
)
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.HEALTHY)
await asyncio.sleep(0)
assert install_addon_ssh.state == AddonState.STARTED
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.FAILED,
id="abc123",
time=1,
),
)
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED)
await asyncio.sleep(0)
assert install_addon_ssh.state == AddonState.ERROR
# Test other addons are ignored
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name="addon_local_non_installed",
state=ContainerState.RUNNING,
id="abc123",
time=1,
),
)
_fire_test_event(coresys, "addon_local_non_installed", ContainerState.RUNNING)
await asyncio.sleep(0)
assert install_addon_ssh.state == AddonState.ERROR
async def mock_current_state(state: ContainerState) -> ContainerState:
"""Mock for current state method."""
return state
async def mock_stop() -> None:
"""Mock for stop method."""
async def test_addon_watchdog(coresys: CoreSys, install_addon_ssh: Addon) -> None:
"""Test addon watchdog works correctly."""
with patch.object(type(install_addon_ssh.instance), "attach"):
with patch.object(DockerAddon, "attach"):
await install_addon_ssh.load()
install_addon_ssh.watchdog = True
with patch.object(Addon, "restart") as restart, patch.object(
Addon, "start"
) as start, patch.object(
type(install_addon_ssh.instance), "current_state"
) as current_state:
) as start, patch.object(DockerAddon, "current_state") as current_state:
# Restart if it becomes unhealthy
current_state.return_value = mock_current_state(ContainerState.UNHEALTHY)
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.UNHEALTHY,
id="abc123",
time=1,
),
)
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.UNHEALTHY)
await asyncio.sleep(0)
restart.assert_called_once()
start.assert_not_called()
restart.reset_mock()
current_state.return_value = mock_current_state(ContainerState.FAILED)
with patch.object(
type(install_addon_ssh.instance), "stop", return_value=mock_stop()
) as stop:
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.FAILED,
id="abc123",
time=1,
),
)
# Rebuild if it failed
current_state.return_value = mock_current_state(ContainerState.FAILED)
with patch.object(DockerAddon, "stop", return_value=mock_stop()) as stop:
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED)
await asyncio.sleep(0)
stop.assert_called_once_with(remove_container=True)
restart.assert_not_called()
start.assert_called_once()
start.reset_mock()
# Do not process event if container state has changed since fired
current_state.return_value = mock_current_state(ContainerState.HEALTHY)
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.FAILED,
id="abc123",
time=1,
),
)
await asyncio.sleep(0)
restart.assert_not_called()
start.assert_not_called()
# Do not restart when addon stopped normally
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.STOPPED,
id="abc123",
time=1,
),
)
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED)
await asyncio.sleep(0)
restart.assert_not_called()
start.assert_not_called()
# Other addons ignored
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name="addon_local_non_installed",
state=ContainerState.UNHEALTHY,
id="abc123",
time=1,
),
)
current_state.return_value = mock_current_state(ContainerState.UNHEALTHY)
_fire_test_event(coresys, "addon_local_non_installed", ContainerState.UNHEALTHY)
await asyncio.sleep(0)
restart.assert_not_called()
start.assert_not_called()
async def test_watchdog_on_stop(coresys: CoreSys, install_addon_ssh: Addon) -> None:
"""Test addon watchdog restarts addon on stop if not manual."""
with patch.object(DockerAddon, "attach"):
await install_addon_ssh.load()
install_addon_ssh.watchdog = True
with patch.object(Addon, "restart") as restart, patch.object(
DockerAddon,
"current_state",
return_value=mock_current_state(ContainerState.STOPPED),
), patch.object(DockerAddon, "stop", return_value=mock_stop()):
# Do not restart when addon stopped by user
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
await asyncio.sleep(0)
await install_addon_ssh.stop()
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
await asyncio.sleep(0)
restart.assert_not_called()
# Do restart addon if it stops and user didn't do it
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
await asyncio.sleep(0)
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
await asyncio.sleep(0)
restart.assert_called_once()
async def test_listener_attached_on_install(coresys: CoreSys, repository):
"""Test events listener attached on addon install."""
container_collection = MagicMock()
@ -258,19 +217,45 @@ async def test_listener_attached_on_install(coresys: CoreSys, repository):
):
await coresys.addons.install.__wrapped__(coresys.addons, TEST_ADDON_SLUG)
coresys.bus.fire_event(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=f"addon_{TEST_ADDON_SLUG}",
state=ContainerState.RUNNING,
id="abc123",
time=1,
),
)
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
await asyncio.sleep(0)
assert coresys.addons.get(TEST_ADDON_SLUG).state == AddonState.STARTED
@pytest.mark.parametrize(
"boot_timedelta,restart_count", [(timedelta(), 0), (timedelta(days=1), 1)]
)
async def test_watchdog_during_attach(
coresys: CoreSys,
repository: Repository,
boot_timedelta: timedelta,
restart_count: int,
):
"""Test host reboot treated as manual stop but not supervisor restart."""
store = coresys.addons.store[TEST_ADDON_SLUG]
coresys.addons.data.install(store)
with patch.object(Addon, "restart") as restart, patch.object(
type(coresys.hardware.helper),
"last_boot",
new=PropertyMock(return_value=utcnow()),
), patch.object(DockerAddon, "attach"), patch.object(
DockerAddon,
"current_state",
return_value=mock_current_state(ContainerState.STOPPED),
):
coresys.config.last_boot = coresys.hardware.helper.last_boot + boot_timedelta
addon = Addon(coresys, store.slug)
coresys.addons.local[addon.slug] = addon
addon.watchdog = True
await addon.load()
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
await asyncio.sleep(0)
assert restart.call_count == restart_count
async def test_install_update_fails_if_out_of_date(
coresys: CoreSys, install_addon_ssh: Addon
):