From ff6ce7b0917a079c43a3022cba536d58fe2f3ea6 Mon Sep 17 00:00:00 2001 From: mwiegand Date: Sat, 9 May 2026 13:09:52 +0200 Subject: [PATCH] =?UTF-8?q?refactor(l4d2-host):=20unmount=20via=20ExecStop?= =?UTF-8?q?Post=20=E2=80=94=20single=20code=20path=20mirroring=20mount?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symmetric with the earlier mount cleanup (commits 519567e..a982995). Until now, the unit's ExecStartPre handled mount but the Python side still drove unmount: stop_instance and _purge_instance both called _mounter.unmount, which wrapped sudo + the helper. Two code paths for two halves of the same lifecycle. Move unmount into the unit: - ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i (ExecStopPost, not ExecStop, so it runs after the cgroup is cleared; ExecStop runs while srcds is alive and would EBUSY the umount syscall.) - Helper's umount verb is now idempotent (mirrors mount): if merged isn't a mount point, return early. PRINT_ONLY mode bypasses both short-circuits so the unit tests still exercise the full nsenter argv. Drop the dead Python machinery: - _mounter.unmount(...) calls in stop_instance and _purge_instance - _mounter global + KernelOverlayFSMounter import - The whole l4d2host/fs/ package (OverlayMounter ABC + KernelOverlayFSMounter class) — no production callers, just self-tests - l4d2host/tests/test_kernel_overlayfs.py - test_stop_succeeds_when_unmount_fails / test_delete_succeeds_when_unmount_fails (tested Python-side unmount-failure tolerance that no longer exists) - The l4d2host.fs.kernel_overlayfs.run_command monkeypatches in lifecycle tests After this, the only thing start_instance does beyond cfg-staging is ask systemd to enable+start the unit. stop/delete/reset only ask systemd to disable; the overlay lifecycle lives entirely in the unit file. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../systemd/system/left4me-server@.service | 15 ++-- .../usr/local/libexec/left4me/left4me-overlay | 24 ++++-- deploy/tests/test_deploy_artifacts.py | 17 ++++- l4d2host/fs/__init__.py | 0 l4d2host/fs/base.py | 30 -------- l4d2host/fs/kernel_overlayfs.py | 53 ------------- l4d2host/instances.py | 34 ++------- l4d2host/tests/test_kernel_overlayfs.py | 76 ------------------- l4d2host/tests/test_lifecycle.py | 65 +--------------- 9 files changed, 54 insertions(+), 260 deletions(-) delete mode 100644 l4d2host/fs/__init__.py delete mode 100644 l4d2host/fs/base.py delete mode 100644 l4d2host/fs/kernel_overlayfs.py delete mode 100644 l4d2host/tests/test_kernel_overlayfs.py diff --git a/deploy/files/usr/local/lib/systemd/system/left4me-server@.service b/deploy/files/usr/local/lib/systemd/system/left4me-server@.service index e9d226b..d5d4698 100644 --- a/deploy/files/usr/local/lib/systemd/system/left4me-server@.service +++ b/deploy/files/usr/local/lib/systemd/system/left4me-server@.service @@ -20,18 +20,21 @@ EnvironmentFile=/var/lib/left4me/instances/%i/instance.env # runs in the unit's home (cwd doesn't matter for the mount helper); the # ExecStart re-applies WorkingDirectory after the mount and finds the dir. WorkingDirectory=-/var/lib/left4me/runtime/%i/merged/left4dead2 -# Single source of truth for the kernel-overlayfs mount: the web app's -# start_instance only stages cfg files and asks systemd to enable+start -# this unit; the actual `mount -t overlay` lives here so reboot auto-start -# works the same as a UI-driven start. Helper is idempotent. +# Single source of truth for the kernel-overlayfs mount lifecycle: the web +# app's start_instance only stages cfg files and asks systemd to enable+ +# start this unit; the actual `mount -t overlay` lives here so reboot +# auto-start works the same as a UI-driven start. ExecStopPost mirrors it +# so the unmount lives in the same place — no Python-side _mounter needed +# in stop/delete/reset paths. Both helper verbs are idempotent. # # `+` prefix runs the helper as PID 1 (root, no sandbox). Required because # the unit has NoNewPrivileges=true, which blocks sudo's setuid escalation # — and the helper itself needs root to nsenter into PID 1's mnt namespace -# anyway, so bypassing sandbox here is no looser than the sudoers rule -# the web app uses for its own helper invocations. +# anyway. ExecStopPost (not ExecStop) so unmount runs after the cgroup is +# cleared; ExecStop runs while srcds is still alive and would EBUSY. ExecStartPre=+/usr/local/libexec/left4me/left4me-overlay mount %i ExecStart=/var/lib/left4me/installation/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS +ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i Restart=on-failure RestartSec=5 diff --git a/deploy/files/usr/local/libexec/left4me/left4me-overlay b/deploy/files/usr/local/libexec/left4me/left4me-overlay index 8b9203b..ba27134 100644 --- a/deploy/files/usr/local/libexec/left4me/left4me-overlay +++ b/deploy/files/usr/local/libexec/left4me/left4me-overlay @@ -134,12 +134,12 @@ def cmd_mount(name: str) -> None: # successfully but ExecStart failed afterwards (and Restart=on-failure # fires another cycle), the second ExecStartPre would otherwise refuse # to mount-on-top. Short-circuit here so the second cycle just gets - # straight to ExecStart. This also handles the dual-path case where - # both the web app's start_instance and the unit's ExecStartPre call - # the helper. - if os.path.ismount(merged_for_check): - if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1": - print("ALREADY_MOUNTED") + # straight to ExecStart. PRINT_ONLY (test mode) bypasses this so the + # tests can exercise the full nsenter argv regardless of mount state. + if ( + os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") != "1" + and os.path.ismount(merged_for_check) + ): return instance_env = r / "instances" / name / "instance.env" @@ -178,6 +178,18 @@ def cmd_umount(name: str) -> None: merged = (runtime_name_dir / "merged").resolve(strict=True) if merged.parent != runtime_name_dir: die(f"merged resolved outside runtime/{name}: {merged}") + + # Idempotency: if merged isn't a mount point right now, we have nothing + # to do. Mirrors cmd_mount's symmetric check. ExecStopPost on the unit + # is the one canonical caller, but a manual `systemctl reset-failed` + # cycle or a redundant cleanup pass should still be a no-op. PRINT_ONLY + # bypasses for the same reason as cmd_mount above. + if ( + os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") != "1" + and not os.path.ismount(merged) + ): + return + argv = [ NSENTER, "--mount=/proc/1/ns/mnt", diff --git a/deploy/tests/test_deploy_artifacts.py b/deploy/tests/test_deploy_artifacts.py index efd15d5..38c3e9e 100644 --- a/deploy/tests/test_deploy_artifacts.py +++ b/deploy/tests/test_deploy_artifacts.py @@ -101,6 +101,19 @@ def test_server_unit_mounts_overlay_via_exec_start_pre(): assert "StartLimitIntervalSec=60s" in unit +def test_server_unit_unmounts_overlay_via_exec_stop_post(): + """Single source of truth for unmount, mirroring the mount path. + ExecStopPost (not ExecStop) so it runs after srcds has fully exited + and the cgroup is cleared — otherwise the open files in merged/ would + EBUSY the umount syscall. + """ + unit = SERVER_UNIT.read_text() + assert ( + "ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i" + in unit + ) + + def test_overlay_helper_mount_is_idempotent_when_already_mounted(): """ExecStartPre runs on every Restart=on-failure cycle. If a previous start mounted successfully but ExecStart failed afterwards, the next @@ -108,7 +121,9 @@ def test_overlay_helper_mount_is_idempotent_when_already_mounted(): short-circuit when merged is already a mount point. """ text = OVERLAY_HELPER.read_text() - assert "os.path.ismount" in text + # Two ismount checks now: one in cmd_mount (skip if mounted), + # one in cmd_umount (skip if not mounted). + assert text.count("os.path.ismount") >= 2 def test_server_unit_contains_perf_baseline_directives(): diff --git a/l4d2host/fs/__init__.py b/l4d2host/fs/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/l4d2host/fs/base.py b/l4d2host/fs/base.py deleted file mode 100644 index c34f8d4..0000000 --- a/l4d2host/fs/base.py +++ /dev/null @@ -1,30 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Callable - - -class OverlayMounter(ABC): - @abstractmethod - def mount( - self, - *, - lowerdirs: str, - upperdir: Path, - workdir: Path, - merged: Path, - on_stdout: Callable[[str], None] | None = None, - on_stderr: Callable[[str], None] | None = None, - passthrough: bool = False, - ) -> None: - raise NotImplementedError - - @abstractmethod - def unmount( - self, - *, - merged: Path, - on_stdout: Callable[[str], None] | None = None, - on_stderr: Callable[[str], None] | None = None, - passthrough: bool = False, - ) -> None: - raise NotImplementedError diff --git a/l4d2host/fs/kernel_overlayfs.py b/l4d2host/fs/kernel_overlayfs.py deleted file mode 100644 index 164eec0..0000000 --- a/l4d2host/fs/kernel_overlayfs.py +++ /dev/null @@ -1,53 +0,0 @@ -from pathlib import Path -from typing import Callable - -from l4d2host.fs.base import OverlayMounter -from l4d2host.process import run_command - - -HELPER_PATH = "/usr/local/libexec/left4me/left4me-overlay" - - -class KernelOverlayFSMounter(OverlayMounter): - # Delegates the actual mount/umount syscalls to the privileged - # left4me-overlay helper. The helper takes only the instance name and - # rederives lowerdirs/upper/work/merged from disk; the OverlayMounter - # ABC accepts those args for compatibility, so we extract the name - # from the merged path's parent directory. - def mount( - self, - *, - lowerdirs: str, - upperdir: Path, - workdir: Path, - merged: Path, - on_stdout: Callable[[str], None] | None = None, - on_stderr: Callable[[str], None] | None = None, - passthrough: bool = False, - should_cancel: Callable[[], bool] | None = None, - ) -> None: - del lowerdirs, upperdir, workdir - run_command( - ["sudo", "-n", HELPER_PATH, "mount", merged.parent.name], - on_stdout=on_stdout, - on_stderr=on_stderr, - passthrough=passthrough, - should_cancel=should_cancel, - ) - - def unmount( - self, - *, - merged: Path, - on_stdout: Callable[[str], None] | None = None, - on_stderr: Callable[[str], None] | None = None, - passthrough: bool = False, - should_cancel: Callable[[], bool] | None = None, - ) -> None: - run_command( - ["sudo", "-n", HELPER_PATH, "umount", merged.parent.name], - on_stdout=on_stdout, - on_stderr=on_stderr, - passthrough=passthrough, - should_cancel=should_cancel, - ) diff --git a/l4d2host/instances.py b/l4d2host/instances.py index 053cbde..e88d96f 100644 --- a/l4d2host/instances.py +++ b/l4d2host/instances.py @@ -3,7 +3,6 @@ import shutil import subprocess from typing import Callable -from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter from l4d2host.paths import DEFAULT_LEFT4ME_ROOT, get_left4me_root, overlay_path, validate_instance_name from l4d2host.service_control import disable_service, enable_service from l4d2host.spec import load_spec @@ -12,9 +11,6 @@ from l4d2host.spec import load_spec from l4d2host.logging import emit_step -_mounter = KernelOverlayFSMounter() - - DEFAULT_ROOT = DEFAULT_LEFT4ME_ROOT @@ -121,6 +117,9 @@ def stop_instance( ) -> None: name = validate_instance_name(name) root = get_left4me_root() if root is None else Path(root) + # `disable --now` triggers the unit's ExecStopPost, which unmounts the + # overlay. Single source of truth for unmount lives in the unit file; + # no Python-side unmount needed. emit_step("disabling + stopping systemd service...", on_stdout, passthrough) disable_service( name, @@ -129,17 +128,6 @@ def stop_instance( passthrough=passthrough, should_cancel=should_cancel, ) - emit_step("unmounting runtime overlay (if mounted)...", on_stdout, passthrough) - try: - _mounter.unmount( - merged=root / "runtime" / name / "merged", - on_stdout=on_stdout, - on_stderr=on_stderr, - passthrough=passthrough, - should_cancel=should_cancel, - ) - except subprocess.CalledProcessError: - pass emit_step("stop complete.", on_stdout, passthrough) @@ -155,6 +143,10 @@ def _purge_instance( instance_dir = root / "instances" / name runtime_dir = root / "runtime" / name + # disable --now triggers ExecStopPost which unmounts. The try/except + # tolerates the unit-not-loaded case (e.g., delete on an instance that + # was initialized but never started — no unit, nothing to disable, no + # mount to clean up either). emit_step("disabling + stopping systemd service (if running)...", on_stdout, passthrough) try: disable_service( @@ -167,18 +159,6 @@ def _purge_instance( except subprocess.CalledProcessError: pass - emit_step("unmounting runtime overlay (if mounted)...", on_stdout, passthrough) - try: - _mounter.unmount( - merged=runtime_dir / "merged", - on_stdout=on_stdout, - on_stderr=on_stderr, - passthrough=passthrough, - should_cancel=should_cancel, - ) - except subprocess.CalledProcessError: - pass - emit_step("removing instance files...", on_stdout, passthrough) if instance_dir.exists(): shutil.rmtree(instance_dir) diff --git a/l4d2host/tests/test_kernel_overlayfs.py b/l4d2host/tests/test_kernel_overlayfs.py deleted file mode 100644 index 296e880..0000000 --- a/l4d2host/tests/test_kernel_overlayfs.py +++ /dev/null @@ -1,76 +0,0 @@ -from pathlib import Path - -import pytest - - -HELPER_PATH = "/usr/local/libexec/left4me/left4me-overlay" - - -def test_mount_invokes_helper_with_name_only(monkeypatch: pytest.MonkeyPatch) -> None: - from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter - - calls: list[list[str]] = [] - - def fake_run_command(cmd, **kwargs): - del kwargs - calls.append(list(cmd)) - - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) - - KernelOverlayFSMounter().mount( - lowerdirs="/var/lib/left4me/installation", - upperdir=Path("/var/lib/left4me/runtime/alpha/upper"), - workdir=Path("/var/lib/left4me/runtime/alpha/work"), - merged=Path("/var/lib/left4me/runtime/alpha/merged"), - ) - - assert calls == [["sudo", "-n", HELPER_PATH, "mount", "alpha"]] - - -def test_unmount_invokes_helper_with_umount_verb(monkeypatch: pytest.MonkeyPatch) -> None: - from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter - - calls: list[list[str]] = [] - - def fake_run_command(cmd, **kwargs): - del kwargs - calls.append(list(cmd)) - - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) - - KernelOverlayFSMounter().unmount(merged=Path("/var/lib/left4me/runtime/alpha/merged")) - - assert calls == [["sudo", "-n", HELPER_PATH, "umount", "alpha"]] - - -def test_mount_propagates_run_command_kwargs(monkeypatch: pytest.MonkeyPatch) -> None: - from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter - - captured: dict = {} - - def fake_run_command(cmd, **kwargs): - captured["cmd"] = list(cmd) - captured["kwargs"] = kwargs - - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) - - out: list[str] = [] - err: list[str] = [] - KernelOverlayFSMounter().mount( - lowerdirs="/var/lib/left4me/installation", - upperdir=Path("/var/lib/left4me/runtime/alpha/upper"), - workdir=Path("/var/lib/left4me/runtime/alpha/work"), - merged=Path("/var/lib/left4me/runtime/alpha/merged"), - on_stdout=out.append, - on_stderr=err.append, - passthrough=False, - should_cancel=lambda: False, - ) - - assert captured["cmd"][0:3] == ["sudo", "-n", HELPER_PATH] - captured["kwargs"]["on_stdout"]("hi") - captured["kwargs"]["on_stderr"]("oops") - assert out == ["hi"] - assert err == ["oops"] - assert captured["kwargs"]["passthrough"] is False - assert callable(captured["kwargs"]["should_cancel"]) diff --git a/l4d2host/tests/test_lifecycle.py b/l4d2host/tests/test_lifecycle.py index 6fb28b1..1c4c3ca 100644 --- a/l4d2host/tests/test_lifecycle.py +++ b/l4d2host/tests/test_lifecycle.py @@ -29,7 +29,6 @@ def test_start_order(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: (instance_dir / "server.cfg").write_text("sv_consistency 1") (instance_dir / "spec.yaml").write_text("port: 27015\noverlays: [x, y]\n") - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) start_instance("alpha", root=tmp_path) @@ -73,7 +72,6 @@ def test_start_copies_per_overlay_aliases_and_sweeps_stale( (src_7 / "server.cfg").write_text("ignored: alias not set\n") (upper_cfg_dir / "server_orphan.cfg").write_text("from previous start\n") - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) start_instance("alpha", root=tmp_path) @@ -105,7 +103,6 @@ def test_delete_succeeds_when_stop_service_fails(tmp_path: Path, monkeypatch: py (tmp_path / "instances" / "alpha").mkdir(parents=True) (tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True) - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) delete_instance("alpha", root=tmp_path) @@ -140,7 +137,6 @@ def test_reset_stops_unmounts_and_removes_dirs(tmp_path: Path, monkeypatch: pyte (runtime_dir / "upper" / "logs").mkdir(parents=True) (runtime_dir / "upper" / "logs" / "console.log").write_text("noise") - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) reset_instance("alpha", root=tmp_path) @@ -159,7 +155,6 @@ def test_reset_on_never_initialized_is_noop(tmp_path: Path, monkeypatch: pytest. if "disable" in cmd: raise subprocess.CalledProcessError(returncode=5, cmd=list(cmd), stderr="not loaded") - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) reset_instance("alpha", root=tmp_path) @@ -178,7 +173,6 @@ def test_delete_stopped_instance_removes_dirs(tmp_path: Path, monkeypatch: pytes (tmp_path / "instances" / "alpha").mkdir(parents=True) (tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True) - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) delete_instance("alpha", root=tmp_path) @@ -188,58 +182,7 @@ def test_delete_stopped_instance_removes_dirs(tmp_path: Path, monkeypatch: pytes assert ["sudo", "-n", "/usr/local/libexec/left4me/left4me-systemctl", "disable", "alpha"] in calls -def test_stop_succeeds_when_unmount_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - umount_calls: list[list[str]] = [] - - def fake_run_command(cmd, **kwargs): - del kwargs - if cmd[:4] == [ - "sudo", - "-n", - "/usr/local/libexec/left4me/left4me-overlay", - "umount", - ]: - umount_calls.append(list(cmd)) - raise subprocess.CalledProcessError( - returncode=1, - cmd=list(cmd), - stderr="umount: /var/lib/left4me/runtime/alpha/merged: not mounted", - ) - - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) - monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) - - stop_instance("alpha", root=tmp_path) - - assert umount_calls, "stop must always attempt the overlay helper (no preflight)" - - -def test_delete_succeeds_when_unmount_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - umount_calls: list[list[str]] = [] - - def fake_run_command(cmd, **kwargs): - del kwargs - if cmd[:4] == [ - "sudo", - "-n", - "/usr/local/libexec/left4me/left4me-overlay", - "umount", - ]: - umount_calls.append(list(cmd)) - raise subprocess.CalledProcessError( - returncode=1, - cmd=list(cmd), - stderr="umount: /var/lib/left4me/runtime/alpha/merged: not mounted", - ) - - (tmp_path / "instances" / "alpha").mkdir(parents=True) - (tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True) - - monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command) - monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) - - delete_instance("alpha", root=tmp_path) - - assert umount_calls, "delete must always attempt the overlay helper (no preflight)" - assert not (tmp_path / "instances" / "alpha").exists() - assert not (tmp_path / "runtime" / "alpha").exists() +# test_stop_succeeds_when_unmount_fails / test_delete_succeeds_when_unmount_fails +# were removed when the Python-side unmount was dropped: the unit's +# ExecStopPost is now the single code path for unmount, so there's no +# Python-side failure to tolerate.