refactor(l4d2-host): unmount via ExecStopPost — single code path mirroring mount

Symmetric with the earlier mount cleanup (commits 519567e..a982995). Until
now, the unit's ExecStartPre handled mount but the Python side still drove
unmount: stop_instance and _purge_instance both called _mounter.unmount,
which wrapped sudo + the helper. Two code paths for two halves of the
same lifecycle.

Move unmount into the unit:

- ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i
  (ExecStopPost, not ExecStop, so it runs after the cgroup is cleared;
  ExecStop runs while srcds is alive and would EBUSY the umount syscall.)
- Helper's umount verb is now idempotent (mirrors mount): if merged
  isn't a mount point, return early. PRINT_ONLY mode bypasses both
  short-circuits so the unit tests still exercise the full nsenter argv.

Drop the dead Python machinery:

- _mounter.unmount(...) calls in stop_instance and _purge_instance
- _mounter global + KernelOverlayFSMounter import
- The whole l4d2host/fs/ package (OverlayMounter ABC + KernelOverlayFSMounter
  class) — no production callers, just self-tests
- l4d2host/tests/test_kernel_overlayfs.py
- test_stop_succeeds_when_unmount_fails / test_delete_succeeds_when_unmount_fails
  (tested Python-side unmount-failure tolerance that no longer exists)
- The l4d2host.fs.kernel_overlayfs.run_command monkeypatches in lifecycle tests

After this, the only thing start_instance does beyond cfg-staging is ask
systemd to enable+start the unit. stop/delete/reset only ask systemd to
disable; the overlay lifecycle lives entirely in the unit file.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
mwiegand 2026-05-09 13:09:52 +02:00
parent fc371711ec
commit ff6ce7b091
No known key found for this signature in database
9 changed files with 54 additions and 260 deletions

View file

@ -20,18 +20,21 @@ EnvironmentFile=/var/lib/left4me/instances/%i/instance.env
# runs in the unit's home (cwd doesn't matter for the mount helper); the
# ExecStart re-applies WorkingDirectory after the mount and finds the dir.
WorkingDirectory=-/var/lib/left4me/runtime/%i/merged/left4dead2
# Single source of truth for the kernel-overlayfs mount: the web app's
# start_instance only stages cfg files and asks systemd to enable+start
# this unit; the actual `mount -t overlay` lives here so reboot auto-start
# works the same as a UI-driven start. Helper is idempotent.
# Single source of truth for the kernel-overlayfs mount lifecycle: the web
# app's start_instance only stages cfg files and asks systemd to enable+
# start this unit; the actual `mount -t overlay` lives here so reboot
# auto-start works the same as a UI-driven start. ExecStopPost mirrors it
# so the unmount lives in the same place — no Python-side _mounter needed
# in stop/delete/reset paths. Both helper verbs are idempotent.
#
# `+` prefix runs the helper as PID 1 (root, no sandbox). Required because
# the unit has NoNewPrivileges=true, which blocks sudo's setuid escalation
# — and the helper itself needs root to nsenter into PID 1's mnt namespace
# anyway, so bypassing sandbox here is no looser than the sudoers rule
# the web app uses for its own helper invocations.
# anyway. ExecStopPost (not ExecStop) so unmount runs after the cgroup is
# cleared; ExecStop runs while srcds is still alive and would EBUSY.
ExecStartPre=+/usr/local/libexec/left4me/left4me-overlay mount %i
ExecStart=/var/lib/left4me/installation/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS
ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i
Restart=on-failure
RestartSec=5

View file

@ -134,12 +134,12 @@ def cmd_mount(name: str) -> None:
# successfully but ExecStart failed afterwards (and Restart=on-failure
# fires another cycle), the second ExecStartPre would otherwise refuse
# to mount-on-top. Short-circuit here so the second cycle just gets
# straight to ExecStart. This also handles the dual-path case where
# both the web app's start_instance and the unit's ExecStartPre call
# the helper.
if os.path.ismount(merged_for_check):
if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1":
print("ALREADY_MOUNTED")
# straight to ExecStart. PRINT_ONLY (test mode) bypasses this so the
# tests can exercise the full nsenter argv regardless of mount state.
if (
os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") != "1"
and os.path.ismount(merged_for_check)
):
return
instance_env = r / "instances" / name / "instance.env"
@ -178,6 +178,18 @@ def cmd_umount(name: str) -> None:
merged = (runtime_name_dir / "merged").resolve(strict=True)
if merged.parent != runtime_name_dir:
die(f"merged resolved outside runtime/{name}: {merged}")
# Idempotency: if merged isn't a mount point right now, we have nothing
# to do. Mirrors cmd_mount's symmetric check. ExecStopPost on the unit
# is the one canonical caller, but a manual `systemctl reset-failed`
# cycle or a redundant cleanup pass should still be a no-op. PRINT_ONLY
# bypasses for the same reason as cmd_mount above.
if (
os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") != "1"
and not os.path.ismount(merged)
):
return
argv = [
NSENTER,
"--mount=/proc/1/ns/mnt",

View file

@ -101,6 +101,19 @@ def test_server_unit_mounts_overlay_via_exec_start_pre():
assert "StartLimitIntervalSec=60s" in unit
def test_server_unit_unmounts_overlay_via_exec_stop_post():
"""Single source of truth for unmount, mirroring the mount path.
ExecStopPost (not ExecStop) so it runs after srcds has fully exited
and the cgroup is cleared otherwise the open files in merged/ would
EBUSY the umount syscall.
"""
unit = SERVER_UNIT.read_text()
assert (
"ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i"
in unit
)
def test_overlay_helper_mount_is_idempotent_when_already_mounted():
"""ExecStartPre runs on every Restart=on-failure cycle. If a previous
start mounted successfully but ExecStart failed afterwards, the next
@ -108,7 +121,9 @@ def test_overlay_helper_mount_is_idempotent_when_already_mounted():
short-circuit when merged is already a mount point.
"""
text = OVERLAY_HELPER.read_text()
assert "os.path.ismount" in text
# Two ismount checks now: one in cmd_mount (skip if mounted),
# one in cmd_umount (skip if not mounted).
assert text.count("os.path.ismount") >= 2
def test_server_unit_contains_perf_baseline_directives():

View file

@ -1,30 +0,0 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Callable
class OverlayMounter(ABC):
@abstractmethod
def mount(
self,
*,
lowerdirs: str,
upperdir: Path,
workdir: Path,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
) -> None:
raise NotImplementedError
@abstractmethod
def unmount(
self,
*,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
) -> None:
raise NotImplementedError

View file

@ -1,53 +0,0 @@
from pathlib import Path
from typing import Callable
from l4d2host.fs.base import OverlayMounter
from l4d2host.process import run_command
HELPER_PATH = "/usr/local/libexec/left4me/left4me-overlay"
class KernelOverlayFSMounter(OverlayMounter):
# Delegates the actual mount/umount syscalls to the privileged
# left4me-overlay helper. The helper takes only the instance name and
# rederives lowerdirs/upper/work/merged from disk; the OverlayMounter
# ABC accepts those args for compatibility, so we extract the name
# from the merged path's parent directory.
def mount(
self,
*,
lowerdirs: str,
upperdir: Path,
workdir: Path,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None:
del lowerdirs, upperdir, workdir
run_command(
["sudo", "-n", HELPER_PATH, "mount", merged.parent.name],
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)
def unmount(
self,
*,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None:
run_command(
["sudo", "-n", HELPER_PATH, "umount", merged.parent.name],
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)

View file

@ -3,7 +3,6 @@ import shutil
import subprocess
from typing import Callable
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
from l4d2host.paths import DEFAULT_LEFT4ME_ROOT, get_left4me_root, overlay_path, validate_instance_name
from l4d2host.service_control import disable_service, enable_service
from l4d2host.spec import load_spec
@ -12,9 +11,6 @@ from l4d2host.spec import load_spec
from l4d2host.logging import emit_step
_mounter = KernelOverlayFSMounter()
DEFAULT_ROOT = DEFAULT_LEFT4ME_ROOT
@ -121,6 +117,9 @@ def stop_instance(
) -> None:
name = validate_instance_name(name)
root = get_left4me_root() if root is None else Path(root)
# `disable --now` triggers the unit's ExecStopPost, which unmounts the
# overlay. Single source of truth for unmount lives in the unit file;
# no Python-side unmount needed.
emit_step("disabling + stopping systemd service...", on_stdout, passthrough)
disable_service(
name,
@ -129,17 +128,6 @@ def stop_instance(
passthrough=passthrough,
should_cancel=should_cancel,
)
emit_step("unmounting runtime overlay (if mounted)...", on_stdout, passthrough)
try:
_mounter.unmount(
merged=root / "runtime" / name / "merged",
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)
except subprocess.CalledProcessError:
pass
emit_step("stop complete.", on_stdout, passthrough)
@ -155,6 +143,10 @@ def _purge_instance(
instance_dir = root / "instances" / name
runtime_dir = root / "runtime" / name
# disable --now triggers ExecStopPost which unmounts. The try/except
# tolerates the unit-not-loaded case (e.g., delete on an instance that
# was initialized but never started — no unit, nothing to disable, no
# mount to clean up either).
emit_step("disabling + stopping systemd service (if running)...", on_stdout, passthrough)
try:
disable_service(
@ -167,18 +159,6 @@ def _purge_instance(
except subprocess.CalledProcessError:
pass
emit_step("unmounting runtime overlay (if mounted)...", on_stdout, passthrough)
try:
_mounter.unmount(
merged=runtime_dir / "merged",
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)
except subprocess.CalledProcessError:
pass
emit_step("removing instance files...", on_stdout, passthrough)
if instance_dir.exists():
shutil.rmtree(instance_dir)

View file

@ -1,76 +0,0 @@
from pathlib import Path
import pytest
HELPER_PATH = "/usr/local/libexec/left4me/left4me-overlay"
def test_mount_invokes_helper_with_name_only(monkeypatch: pytest.MonkeyPatch) -> None:
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
calls.append(list(cmd))
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
KernelOverlayFSMounter().mount(
lowerdirs="/var/lib/left4me/installation",
upperdir=Path("/var/lib/left4me/runtime/alpha/upper"),
workdir=Path("/var/lib/left4me/runtime/alpha/work"),
merged=Path("/var/lib/left4me/runtime/alpha/merged"),
)
assert calls == [["sudo", "-n", HELPER_PATH, "mount", "alpha"]]
def test_unmount_invokes_helper_with_umount_verb(monkeypatch: pytest.MonkeyPatch) -> None:
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
calls.append(list(cmd))
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
KernelOverlayFSMounter().unmount(merged=Path("/var/lib/left4me/runtime/alpha/merged"))
assert calls == [["sudo", "-n", HELPER_PATH, "umount", "alpha"]]
def test_mount_propagates_run_command_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
captured: dict = {}
def fake_run_command(cmd, **kwargs):
captured["cmd"] = list(cmd)
captured["kwargs"] = kwargs
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
out: list[str] = []
err: list[str] = []
KernelOverlayFSMounter().mount(
lowerdirs="/var/lib/left4me/installation",
upperdir=Path("/var/lib/left4me/runtime/alpha/upper"),
workdir=Path("/var/lib/left4me/runtime/alpha/work"),
merged=Path("/var/lib/left4me/runtime/alpha/merged"),
on_stdout=out.append,
on_stderr=err.append,
passthrough=False,
should_cancel=lambda: False,
)
assert captured["cmd"][0:3] == ["sudo", "-n", HELPER_PATH]
captured["kwargs"]["on_stdout"]("hi")
captured["kwargs"]["on_stderr"]("oops")
assert out == ["hi"]
assert err == ["oops"]
assert captured["kwargs"]["passthrough"] is False
assert callable(captured["kwargs"]["should_cancel"])

View file

@ -29,7 +29,6 @@ def test_start_order(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
(instance_dir / "server.cfg").write_text("sv_consistency 1")
(instance_dir / "spec.yaml").write_text("port: 27015\noverlays: [x, y]\n")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
start_instance("alpha", root=tmp_path)
@ -73,7 +72,6 @@ def test_start_copies_per_overlay_aliases_and_sweeps_stale(
(src_7 / "server.cfg").write_text("ignored: alias not set\n")
(upper_cfg_dir / "server_orphan.cfg").write_text("from previous start\n")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
start_instance("alpha", root=tmp_path)
@ -105,7 +103,6 @@ def test_delete_succeeds_when_stop_service_fails(tmp_path: Path, monkeypatch: py
(tmp_path / "instances" / "alpha").mkdir(parents=True)
(tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
delete_instance("alpha", root=tmp_path)
@ -140,7 +137,6 @@ def test_reset_stops_unmounts_and_removes_dirs(tmp_path: Path, monkeypatch: pyte
(runtime_dir / "upper" / "logs").mkdir(parents=True)
(runtime_dir / "upper" / "logs" / "console.log").write_text("noise")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
reset_instance("alpha", root=tmp_path)
@ -159,7 +155,6 @@ def test_reset_on_never_initialized_is_noop(tmp_path: Path, monkeypatch: pytest.
if "disable" in cmd:
raise subprocess.CalledProcessError(returncode=5, cmd=list(cmd), stderr="not loaded")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
reset_instance("alpha", root=tmp_path)
@ -178,7 +173,6 @@ def test_delete_stopped_instance_removes_dirs(tmp_path: Path, monkeypatch: pytes
(tmp_path / "instances" / "alpha").mkdir(parents=True)
(tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
delete_instance("alpha", root=tmp_path)
@ -188,58 +182,7 @@ def test_delete_stopped_instance_removes_dirs(tmp_path: Path, monkeypatch: pytes
assert ["sudo", "-n", "/usr/local/libexec/left4me/left4me-systemctl", "disable", "alpha"] in calls
def test_stop_succeeds_when_unmount_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
umount_calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
if cmd[:4] == [
"sudo",
"-n",
"/usr/local/libexec/left4me/left4me-overlay",
"umount",
]:
umount_calls.append(list(cmd))
raise subprocess.CalledProcessError(
returncode=1,
cmd=list(cmd),
stderr="umount: /var/lib/left4me/runtime/alpha/merged: not mounted",
)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
stop_instance("alpha", root=tmp_path)
assert umount_calls, "stop must always attempt the overlay helper (no preflight)"
def test_delete_succeeds_when_unmount_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
umount_calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
if cmd[:4] == [
"sudo",
"-n",
"/usr/local/libexec/left4me/left4me-overlay",
"umount",
]:
umount_calls.append(list(cmd))
raise subprocess.CalledProcessError(
returncode=1,
cmd=list(cmd),
stderr="umount: /var/lib/left4me/runtime/alpha/merged: not mounted",
)
(tmp_path / "instances" / "alpha").mkdir(parents=True)
(tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
delete_instance("alpha", root=tmp_path)
assert umount_calls, "delete must always attempt the overlay helper (no preflight)"
assert not (tmp_path / "instances" / "alpha").exists()
assert not (tmp_path / "runtime" / "alpha").exists()
# test_stop_succeeds_when_unmount_fails / test_delete_succeeds_when_unmount_fails
# were removed when the Python-side unmount was dropped: the unit's
# ExecStopPost is now the single code path for unmount, so there's no
# Python-side failure to tolerate.