Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions supervisor/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def run_os_startup_check_cleanup() -> None:

# Create startup task that can be cancelled gracefully
startup_task = loop.create_task(coresys.core.start())
shutdown_tasks: list[asyncio.Task] = []

async def host_is_shutting_down() -> bool:
"""Return True if systemd is shutting the host down.
Expand Down Expand Up @@ -103,10 +102,7 @@ def shutdown_handler() -> None:
_LOGGER.warning("Supervisor startup interrupted by shutdown signal")
startup_task.cancel()

if shutdown_tasks and not shutdown_tasks[0].done():
return

shutdown_tasks[:] = [coresys.create_task(stop_supervisor())]
coresys.create_task(stop_supervisor())

bootstrap.register_signal_handlers(loop, shutdown_handler)

Expand Down
60 changes: 45 additions & 15 deletions supervisor/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(self, coresys: CoreSys) -> None:
self.coresys: CoreSys = coresys
self._state: CoreState = CoreState.INITIALIZE
self.exit_code: int = 0
self._shutdown_event: asyncio.Event = asyncio.Event()

@property
def state(self) -> CoreState:
Expand Down Expand Up @@ -364,28 +365,57 @@ async def stop(self) -> None:
self.sys_loop.stop()

async def shutdown(self, *, remove_homeassistant_container: bool = False) -> None:
"""Shutdown all running containers in correct order."""
"""Shutdown all running containers in correct order.

Reentrant: if a shutdown is already in progress, additional callers
await completion of the in-flight shutdown instead of starting a
second one.
"""
# Nothing coherent to gracefully shut down before startup completes;
# the caller (e.g. signal handler) is expected to follow up with stop().
if self.state in STARTING_STATES:
_LOGGER.warning(
"Ignoring shutdown request, Supervisor has not finished starting"
)
return

# Supervisor is already tearing itself down, no point running shutdown
if self.state in (CoreState.STOPPING, CoreState.CLOSE):
_LOGGER.warning("Ignoring shutdown request, Supervisor is already stopping")
return

# Another shutdown is in progress, wait for it to complete
if self.state == CoreState.SHUTDOWN:
await self._shutdown_event.wait()
return

# Reset event for this shutdown cycle (supports repeated use, e.g. backup restore)
self._shutdown_event.clear()
Comment on lines +392 to +393

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The concept of repeated use doesn't really make sense here. Shutdown eventually leads to a stop in the python process which obviously clears the event. And there's no way to go back to CoreState.RUNNING from those closing states. So I'm not really sure when this would have any effect? At the very least the comment should probably be adjusted since its example isn't a real use case.


# don't process scheduler anymore
if self.state == CoreState.RUNNING:
await self.set_state(CoreState.SHUTDOWN)

# Shutdown Application Apps, using Home Assistant API
await self.sys_apps.shutdown(AppStartup.APPLICATION)
try:
# Shutdown Application Apps, using Home Assistant API
await self.sys_apps.shutdown(AppStartup.APPLICATION)

# Close Home Assistant
with suppress(HassioError):
await self.sys_homeassistant.core.stop(
remove_container=remove_homeassistant_container
)
# Close Home Assistant
with suppress(HassioError):
await self.sys_homeassistant.core.stop(
remove_container=remove_homeassistant_container
)

# Shutdown System Apps
await self.sys_apps.shutdown(AppStartup.SERVICES)
await self.sys_apps.shutdown(AppStartup.SYSTEM)
await self.sys_apps.shutdown(AppStartup.INITIALIZE)
# Shutdown System Apps
await self.sys_apps.shutdown(AppStartup.SERVICES)
await self.sys_apps.shutdown(AppStartup.SYSTEM)
await self.sys_apps.shutdown(AppStartup.INITIALIZE)

# Shutdown all Plugins
if self.state in (CoreState.STOPPING, CoreState.SHUTDOWN):
await self.sys_plugins.shutdown()
# Shutdown all Plugins
if self.state in (CoreState.STOPPING, CoreState.SHUTDOWN):
await self.sys_plugins.shutdown()
finally:
self._shutdown_event.set()

async def _update_last_boot(self) -> None:
"""Update last boot time."""
Expand Down
95 changes: 95 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,98 @@ async def test_setup_unhandled_exception_captured(
capture_mock.assert_called_once()
assert "Fatal error happening on load Task" in caplog.text
assert UnhealthyReason.SETUP in coresys.resolution.unhealthy


async def test_shutdown_reentrant_waits(coresys: CoreSys):
"""Concurrent shutdown() calls await the in-flight shutdown rather than re-running."""
call_count = 0
shutdown_started = asyncio.Event()
proceed = asyncio.Event()

original_shutdown = coresys.apps.shutdown

async def slow_app_shutdown(startup):
nonlocal call_count
call_count += 1
shutdown_started.set()
await proceed.wait()
return await original_shutdown(startup)

await coresys.core.set_state(CoreState.RUNNING)

with patch.object(coresys.apps, "shutdown", side_effect=slow_app_shutdown):
task1 = asyncio.create_task(coresys.core.shutdown())
await shutdown_started.wait()

# Second call should wait, not start a new shutdown
task2 = asyncio.create_task(coresys.core.shutdown())
await asyncio.sleep(0.05)

proceed.set()
await asyncio.gather(task1, task2)

# AppStartup has 4 levels (APPLICATION/SERVICES/SYSTEM/INITIALIZE); a single
# shutdown call iterates them. A re-entered shutdown would double the count.
assert call_count == 4
assert coresys.core._shutdown_event.is_set()


async def test_shutdown_event_reset_between_cycles(coresys: CoreSys):
"""Repeated shutdown cycles (e.g. backup restore) work because the event is reset."""
await coresys.core.set_state(CoreState.RUNNING)

await coresys.core.shutdown()
assert coresys.core._shutdown_event.is_set()

# Simulate restore returning to RUNNING and shutting down again
await coresys.core.set_state(CoreState.RUNNING)

second_entered = False
original_shutdown = coresys.apps.shutdown

async def track_app_shutdown(startup):
nonlocal second_entered
second_entered = True
return await original_shutdown(startup)

with patch.object(coresys.apps, "shutdown", side_effect=track_app_shutdown):
await coresys.core.shutdown()

assert second_entered
assert coresys.core._shutdown_event.is_set()
Comment on lines +272 to +294

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean this is very much a fabricated use case, it can't ever happen in production. CoreState cannot get back to RUNNING from SHUTDOWN. Is there a plan to support some kind of live backup and restore for Supervisor I'm unaware of?



@pytest.mark.parametrize(
"state", [CoreState.STOPPING, CoreState.CLOSE], ids=["stopping", "close"]
)
async def test_shutdown_ignored_during_stop(
coresys: CoreSys, caplog: pytest.LogCaptureFixture, state: CoreState
):
"""Shutdown is ignored when Supervisor is already stopping."""
await coresys.core.set_state(state)

with patch.object(coresys.apps, "shutdown") as mock_app_shutdown:
await coresys.core.shutdown()

mock_app_shutdown.assert_not_called()
assert "Ignoring shutdown request, Supervisor is already stopping" in caplog.text


@pytest.mark.parametrize(
"state",
[CoreState.INITIALIZE, CoreState.STARTUP, CoreState.SETUP],
ids=["initialize", "startup", "setup"],
)
async def test_shutdown_skipped_during_startup(
coresys: CoreSys, caplog: pytest.LogCaptureFixture, state: CoreState
):
"""Shutdown returns early when Supervisor has not finished starting yet."""
await coresys.core.set_state(state)

with patch.object(coresys.apps, "shutdown") as mock_app_shutdown:
await coresys.core.shutdown()

mock_app_shutdown.assert_not_called()
assert (
"Ignoring shutdown request, Supervisor has not finished starting" in caplog.text
)