From f4811a0243270954bda10bd2b21faabf45f761d9 Mon Sep 17 00:00:00 2001 From: Mike Degatano Date: Mon, 22 Aug 2022 20:29:27 -0400 Subject: [PATCH] Watchdog addon on successful but unexpected exit (#3815) --- supervisor/addons/addon.py | 16 ++- supervisor/hardware/helper.py | 2 +- tests/addons/test_addon.py | 231 ++++++++++++++++------------------ 3 files changed, 123 insertions(+), 126 deletions(-) diff --git a/supervisor/addons/addon.py b/supervisor/addons/addon.py index 523771752..ccb8ca1ba 100644 --- a/supervisor/addons/addon.py +++ b/supervisor/addons/addon.py @@ -112,6 +112,9 @@ class Addon(AddonModel): super().__init__(coresys, slug) self.instance: DockerAddon = DockerAddon(coresys, self) self._state: AddonState = AddonState.UNKNOWN + self._manual_stop: bool = ( + self.sys_hardware.helper.last_boot == self.sys_config.last_boot + ) @Job( name=f"addon_{slug}_restart_after_problem", @@ -682,6 +685,7 @@ class Addon(AddonModel): async def stop(self) -> None: """Stop add-on.""" + self._manual_stop = True try: await self.instance.stop() except DockerError as err: @@ -950,6 +954,7 @@ class Addon(AddonModel): ContainerState.HEALTHY, ContainerState.UNHEALTHY, ]: + self._manual_stop = False self.state = AddonState.STARTED elif event.state == ContainerState.STOPPED: self.state = AddonState.STOPPED @@ -958,8 +963,15 @@ class Addon(AddonModel): async def watchdog_container(self, event: DockerContainerStateEvent) -> None: """Process state changes in addon container and restart if necessary.""" - if not (event.name == self.instance.name and self.watchdog): + if ( + not (event.name == self.instance.name and self.watchdog) + or self._manual_stop + ): return - if event.state in [ContainerState.FAILED, ContainerState.UNHEALTHY]: + if event.state in [ + ContainerState.FAILED, + ContainerState.STOPPED, + ContainerState.UNHEALTHY, + ]: await self._restart_after_problem(self, event.state) diff --git a/supervisor/hardware/helper.py b/supervisor/hardware/helper.py index 156692848..3ac3c88f6 100644 --- a/supervisor/hardware/helper.py +++ b/supervisor/hardware/helper.py @@ -41,7 +41,7 @@ class HwHelper(CoreSysAttributes): return bool(self.sys_hardware.filter_devices(subsystem=UdevSubsystem.USB)) @property - def last_boot(self) -> str | None: + def last_boot(self) -> datetime | None: """Return last boot time.""" try: stats: str = _PROC_STAT.read_text(encoding="utf-8") diff --git a/tests/addons/test_addon.py b/tests/addons/test_addon.py index e59881bd5..9b7602e9c 100644 --- a/tests/addons/test_addon.py +++ b/tests/addons/test_addon.py @@ -1,6 +1,7 @@ """Test Home Assistant Add-ons.""" import asyncio +from datetime import timedelta from unittest.mock import MagicMock, PropertyMock, patch from docker.errors import DockerException @@ -9,13 +10,38 @@ import pytest from supervisor.addons.addon import Addon from supervisor.const import AddonState, BusEvent from supervisor.coresys import CoreSys +from supervisor.docker.addon import DockerAddon from supervisor.docker.const import ContainerState from supervisor.docker.monitor import DockerContainerStateEvent from supervisor.exceptions import AddonsJobError, AudioUpdateError +from supervisor.store.repository import Repository +from supervisor.utils.dt import utcnow from ..const import TEST_ADDON_SLUG +def _fire_test_event(coresys: CoreSys, name: str, state: ContainerState): + """Fire a test event.""" + coresys.bus.fire_event( + BusEvent.DOCKER_CONTAINER_STATE_CHANGE, + DockerContainerStateEvent( + name=name, + state=state, + id="abc123", + time=1, + ), + ) + + +async def mock_current_state(state: ContainerState) -> ContainerState: + """Mock for current state method.""" + return state + + +async def mock_stop() -> None: + """Mock for stop method.""" + + def test_options_merge(coresys: CoreSys, install_addon_ssh: Addon) -> None: """Test options merge.""" addon = coresys.addons.get(TEST_ADDON_SLUG) @@ -71,174 +97,107 @@ def test_options_merge(coresys: CoreSys, install_addon_ssh: Addon) -> None: async def test_addon_state_listener(coresys: CoreSys, install_addon_ssh: Addon) -> None: """Test addon is setting state from docker events.""" - with patch.object(type(install_addon_ssh.instance), "attach"): + with patch.object(DockerAddon, "attach"): await install_addon_ssh.load() assert install_addon_ssh.state == AddonState.UNKNOWN - with patch.object(type(install_addon_ssh), "watchdog_container"): - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.RUNNING, - id="abc123", - time=1, - ), - ) + with patch.object(Addon, "watchdog_container"): + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING) await asyncio.sleep(0) assert install_addon_ssh.state == AddonState.STARTED - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.STOPPED, - id="abc123", - time=1, - ), - ) + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED) await asyncio.sleep(0) assert install_addon_ssh.state == AddonState.STOPPED - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.HEALTHY, - id="abc123", - time=1, - ), - ) + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.HEALTHY) await asyncio.sleep(0) assert install_addon_ssh.state == AddonState.STARTED - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.FAILED, - id="abc123", - time=1, - ), - ) + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED) await asyncio.sleep(0) assert install_addon_ssh.state == AddonState.ERROR # Test other addons are ignored - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name="addon_local_non_installed", - state=ContainerState.RUNNING, - id="abc123", - time=1, - ), - ) + _fire_test_event(coresys, "addon_local_non_installed", ContainerState.RUNNING) await asyncio.sleep(0) assert install_addon_ssh.state == AddonState.ERROR -async def mock_current_state(state: ContainerState) -> ContainerState: - """Mock for current state method.""" - return state - - -async def mock_stop() -> None: - """Mock for stop method.""" - - async def test_addon_watchdog(coresys: CoreSys, install_addon_ssh: Addon) -> None: """Test addon watchdog works correctly.""" - with patch.object(type(install_addon_ssh.instance), "attach"): + with patch.object(DockerAddon, "attach"): await install_addon_ssh.load() install_addon_ssh.watchdog = True with patch.object(Addon, "restart") as restart, patch.object( Addon, "start" - ) as start, patch.object( - type(install_addon_ssh.instance), "current_state" - ) as current_state: + ) as start, patch.object(DockerAddon, "current_state") as current_state: + # Restart if it becomes unhealthy current_state.return_value = mock_current_state(ContainerState.UNHEALTHY) - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.UNHEALTHY, - id="abc123", - time=1, - ), - ) + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.UNHEALTHY) await asyncio.sleep(0) restart.assert_called_once() start.assert_not_called() restart.reset_mock() - current_state.return_value = mock_current_state(ContainerState.FAILED) - with patch.object( - type(install_addon_ssh.instance), "stop", return_value=mock_stop() - ) as stop: - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.FAILED, - id="abc123", - time=1, - ), - ) + # Rebuild if it failed + current_state.return_value = mock_current_state(ContainerState.FAILED) + with patch.object(DockerAddon, "stop", return_value=mock_stop()) as stop: + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED) await asyncio.sleep(0) stop.assert_called_once_with(remove_container=True) restart.assert_not_called() start.assert_called_once() start.reset_mock() + # Do not process event if container state has changed since fired current_state.return_value = mock_current_state(ContainerState.HEALTHY) - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.FAILED, - id="abc123", - time=1, - ), - ) - await asyncio.sleep(0) - restart.assert_not_called() - start.assert_not_called() - - # Do not restart when addon stopped normally - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.STOPPED, - id="abc123", - time=1, - ), - ) + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED) await asyncio.sleep(0) restart.assert_not_called() start.assert_not_called() # Other addons ignored - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name="addon_local_non_installed", - state=ContainerState.UNHEALTHY, - id="abc123", - time=1, - ), - ) + current_state.return_value = mock_current_state(ContainerState.UNHEALTHY) + _fire_test_event(coresys, "addon_local_non_installed", ContainerState.UNHEALTHY) await asyncio.sleep(0) restart.assert_not_called() start.assert_not_called() +async def test_watchdog_on_stop(coresys: CoreSys, install_addon_ssh: Addon) -> None: + """Test addon watchdog restarts addon on stop if not manual.""" + with patch.object(DockerAddon, "attach"): + await install_addon_ssh.load() + + install_addon_ssh.watchdog = True + + with patch.object(Addon, "restart") as restart, patch.object( + DockerAddon, + "current_state", + return_value=mock_current_state(ContainerState.STOPPED), + ), patch.object(DockerAddon, "stop", return_value=mock_stop()): + # Do not restart when addon stopped by user + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING) + await asyncio.sleep(0) + await install_addon_ssh.stop() + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED) + await asyncio.sleep(0) + restart.assert_not_called() + + # Do restart addon if it stops and user didn't do it + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING) + await asyncio.sleep(0) + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED) + await asyncio.sleep(0) + restart.assert_called_once() + + async def test_listener_attached_on_install(coresys: CoreSys, repository): """Test events listener attached on addon install.""" container_collection = MagicMock() @@ -258,19 +217,45 @@ async def test_listener_attached_on_install(coresys: CoreSys, repository): ): await coresys.addons.install.__wrapped__(coresys.addons, TEST_ADDON_SLUG) - coresys.bus.fire_event( - BusEvent.DOCKER_CONTAINER_STATE_CHANGE, - DockerContainerStateEvent( - name=f"addon_{TEST_ADDON_SLUG}", - state=ContainerState.RUNNING, - id="abc123", - time=1, - ), - ) + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING) await asyncio.sleep(0) assert coresys.addons.get(TEST_ADDON_SLUG).state == AddonState.STARTED +@pytest.mark.parametrize( + "boot_timedelta,restart_count", [(timedelta(), 0), (timedelta(days=1), 1)] +) +async def test_watchdog_during_attach( + coresys: CoreSys, + repository: Repository, + boot_timedelta: timedelta, + restart_count: int, +): + """Test host reboot treated as manual stop but not supervisor restart.""" + store = coresys.addons.store[TEST_ADDON_SLUG] + coresys.addons.data.install(store) + + with patch.object(Addon, "restart") as restart, patch.object( + type(coresys.hardware.helper), + "last_boot", + new=PropertyMock(return_value=utcnow()), + ), patch.object(DockerAddon, "attach"), patch.object( + DockerAddon, + "current_state", + return_value=mock_current_state(ContainerState.STOPPED), + ): + coresys.config.last_boot = coresys.hardware.helper.last_boot + boot_timedelta + addon = Addon(coresys, store.slug) + coresys.addons.local[addon.slug] = addon + addon.watchdog = True + + await addon.load() + _fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED) + await asyncio.sleep(0) + + assert restart.call_count == restart_count + + async def test_install_update_fails_if_out_of_date( coresys: CoreSys, install_addon_ssh: Addon ):