refactor(l4d2-host): unmount via ExecStopPost — single code path mirroring mount

Symmetric with the earlier mount cleanup (commits 519567e..a982995). Until
now, the unit's ExecStartPre handled mount but the Python side still drove
unmount: stop_instance and _purge_instance both called _mounter.unmount,
which wrapped sudo + the helper. Two code paths for two halves of the
same lifecycle.

Move unmount into the unit:

- ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i
  (ExecStopPost, not ExecStop, so it runs after the cgroup is cleared;
  ExecStop runs while srcds is alive and would EBUSY the umount syscall.)
- Helper's umount verb is now idempotent (mirrors mount): if merged
  isn't a mount point, return early. PRINT_ONLY mode bypasses both
  short-circuits so the unit tests still exercise the full nsenter argv.

Drop the dead Python machinery:

- _mounter.unmount(...) calls in stop_instance and _purge_instance
- _mounter global + KernelOverlayFSMounter import
- The whole l4d2host/fs/ package (OverlayMounter ABC + KernelOverlayFSMounter
  class) — no production callers, just self-tests
- l4d2host/tests/test_kernel_overlayfs.py
- test_stop_succeeds_when_unmount_fails / test_delete_succeeds_when_unmount_fails
  (tested Python-side unmount-failure tolerance that no longer exists)
- The l4d2host.fs.kernel_overlayfs.run_command monkeypatches in lifecycle tests

After this, the only thing start_instance does beyond cfg-staging is ask
systemd to enable+start the unit. stop/delete/reset only ask systemd to
disable; the overlay lifecycle lives entirely in the unit file.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
mwiegand 2026-05-09 13:09:52 +02:00
parent fc371711ec
commit ff6ce7b091
No known key found for this signature in database
9 changed files with 54 additions and 260 deletions

View file

@ -20,18 +20,21 @@ EnvironmentFile=/var/lib/left4me/instances/%i/instance.env
# runs in the unit's home (cwd doesn't matter for the mount helper); the # runs in the unit's home (cwd doesn't matter for the mount helper); the
# ExecStart re-applies WorkingDirectory after the mount and finds the dir. # ExecStart re-applies WorkingDirectory after the mount and finds the dir.
WorkingDirectory=-/var/lib/left4me/runtime/%i/merged/left4dead2 WorkingDirectory=-/var/lib/left4me/runtime/%i/merged/left4dead2
# Single source of truth for the kernel-overlayfs mount: the web app's # Single source of truth for the kernel-overlayfs mount lifecycle: the web
# start_instance only stages cfg files and asks systemd to enable+start # app's start_instance only stages cfg files and asks systemd to enable+
# this unit; the actual `mount -t overlay` lives here so reboot auto-start # start this unit; the actual `mount -t overlay` lives here so reboot
# works the same as a UI-driven start. Helper is idempotent. # auto-start works the same as a UI-driven start. ExecStopPost mirrors it
# so the unmount lives in the same place — no Python-side _mounter needed
# in stop/delete/reset paths. Both helper verbs are idempotent.
# #
# `+` prefix runs the helper as PID 1 (root, no sandbox). Required because # `+` prefix runs the helper as PID 1 (root, no sandbox). Required because
# the unit has NoNewPrivileges=true, which blocks sudo's setuid escalation # the unit has NoNewPrivileges=true, which blocks sudo's setuid escalation
# — and the helper itself needs root to nsenter into PID 1's mnt namespace # — and the helper itself needs root to nsenter into PID 1's mnt namespace
# anyway, so bypassing sandbox here is no looser than the sudoers rule # anyway. ExecStopPost (not ExecStop) so unmount runs after the cgroup is
# the web app uses for its own helper invocations. # cleared; ExecStop runs while srcds is still alive and would EBUSY.
ExecStartPre=+/usr/local/libexec/left4me/left4me-overlay mount %i ExecStartPre=+/usr/local/libexec/left4me/left4me-overlay mount %i
ExecStart=/var/lib/left4me/installation/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS ExecStart=/var/lib/left4me/installation/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS
ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i
Restart=on-failure Restart=on-failure
RestartSec=5 RestartSec=5

View file

@ -134,12 +134,12 @@ def cmd_mount(name: str) -> None:
# successfully but ExecStart failed afterwards (and Restart=on-failure # successfully but ExecStart failed afterwards (and Restart=on-failure
# fires another cycle), the second ExecStartPre would otherwise refuse # fires another cycle), the second ExecStartPre would otherwise refuse
# to mount-on-top. Short-circuit here so the second cycle just gets # to mount-on-top. Short-circuit here so the second cycle just gets
# straight to ExecStart. This also handles the dual-path case where # straight to ExecStart. PRINT_ONLY (test mode) bypasses this so the
# both the web app's start_instance and the unit's ExecStartPre call # tests can exercise the full nsenter argv regardless of mount state.
# the helper. if (
if os.path.ismount(merged_for_check): os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") != "1"
if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1": and os.path.ismount(merged_for_check)
print("ALREADY_MOUNTED") ):
return return
instance_env = r / "instances" / name / "instance.env" instance_env = r / "instances" / name / "instance.env"
@ -178,6 +178,18 @@ def cmd_umount(name: str) -> None:
merged = (runtime_name_dir / "merged").resolve(strict=True) merged = (runtime_name_dir / "merged").resolve(strict=True)
if merged.parent != runtime_name_dir: if merged.parent != runtime_name_dir:
die(f"merged resolved outside runtime/{name}: {merged}") die(f"merged resolved outside runtime/{name}: {merged}")
# Idempotency: if merged isn't a mount point right now, we have nothing
# to do. Mirrors cmd_mount's symmetric check. ExecStopPost on the unit
# is the one canonical caller, but a manual `systemctl reset-failed`
# cycle or a redundant cleanup pass should still be a no-op. PRINT_ONLY
# bypasses for the same reason as cmd_mount above.
if (
os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") != "1"
and not os.path.ismount(merged)
):
return
argv = [ argv = [
NSENTER, NSENTER,
"--mount=/proc/1/ns/mnt", "--mount=/proc/1/ns/mnt",

View file

@ -101,6 +101,19 @@ def test_server_unit_mounts_overlay_via_exec_start_pre():
assert "StartLimitIntervalSec=60s" in unit assert "StartLimitIntervalSec=60s" in unit
def test_server_unit_unmounts_overlay_via_exec_stop_post():
"""Single source of truth for unmount, mirroring the mount path.
ExecStopPost (not ExecStop) so it runs after srcds has fully exited
and the cgroup is cleared otherwise the open files in merged/ would
EBUSY the umount syscall.
"""
unit = SERVER_UNIT.read_text()
assert (
"ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i"
in unit
)
def test_overlay_helper_mount_is_idempotent_when_already_mounted(): def test_overlay_helper_mount_is_idempotent_when_already_mounted():
"""ExecStartPre runs on every Restart=on-failure cycle. If a previous """ExecStartPre runs on every Restart=on-failure cycle. If a previous
start mounted successfully but ExecStart failed afterwards, the next start mounted successfully but ExecStart failed afterwards, the next
@ -108,7 +121,9 @@ def test_overlay_helper_mount_is_idempotent_when_already_mounted():
short-circuit when merged is already a mount point. short-circuit when merged is already a mount point.
""" """
text = OVERLAY_HELPER.read_text() text = OVERLAY_HELPER.read_text()
assert "os.path.ismount" in text # Two ismount checks now: one in cmd_mount (skip if mounted),
# one in cmd_umount (skip if not mounted).
assert text.count("os.path.ismount") >= 2
def test_server_unit_contains_perf_baseline_directives(): def test_server_unit_contains_perf_baseline_directives():

View file

@ -1,30 +0,0 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Callable
class OverlayMounter(ABC):
@abstractmethod
def mount(
self,
*,
lowerdirs: str,
upperdir: Path,
workdir: Path,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
) -> None:
raise NotImplementedError
@abstractmethod
def unmount(
self,
*,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
) -> None:
raise NotImplementedError

View file

@ -1,53 +0,0 @@
from pathlib import Path
from typing import Callable
from l4d2host.fs.base import OverlayMounter
from l4d2host.process import run_command
HELPER_PATH = "/usr/local/libexec/left4me/left4me-overlay"
class KernelOverlayFSMounter(OverlayMounter):
# Delegates the actual mount/umount syscalls to the privileged
# left4me-overlay helper. The helper takes only the instance name and
# rederives lowerdirs/upper/work/merged from disk; the OverlayMounter
# ABC accepts those args for compatibility, so we extract the name
# from the merged path's parent directory.
def mount(
self,
*,
lowerdirs: str,
upperdir: Path,
workdir: Path,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None:
del lowerdirs, upperdir, workdir
run_command(
["sudo", "-n", HELPER_PATH, "mount", merged.parent.name],
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)
def unmount(
self,
*,
merged: Path,
on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None:
run_command(
["sudo", "-n", HELPER_PATH, "umount", merged.parent.name],
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)

View file

@ -3,7 +3,6 @@ import shutil
import subprocess import subprocess
from typing import Callable from typing import Callable
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
from l4d2host.paths import DEFAULT_LEFT4ME_ROOT, get_left4me_root, overlay_path, validate_instance_name from l4d2host.paths import DEFAULT_LEFT4ME_ROOT, get_left4me_root, overlay_path, validate_instance_name
from l4d2host.service_control import disable_service, enable_service from l4d2host.service_control import disable_service, enable_service
from l4d2host.spec import load_spec from l4d2host.spec import load_spec
@ -12,9 +11,6 @@ from l4d2host.spec import load_spec
from l4d2host.logging import emit_step from l4d2host.logging import emit_step
_mounter = KernelOverlayFSMounter()
DEFAULT_ROOT = DEFAULT_LEFT4ME_ROOT DEFAULT_ROOT = DEFAULT_LEFT4ME_ROOT
@ -121,6 +117,9 @@ def stop_instance(
) -> None: ) -> None:
name = validate_instance_name(name) name = validate_instance_name(name)
root = get_left4me_root() if root is None else Path(root) root = get_left4me_root() if root is None else Path(root)
# `disable --now` triggers the unit's ExecStopPost, which unmounts the
# overlay. Single source of truth for unmount lives in the unit file;
# no Python-side unmount needed.
emit_step("disabling + stopping systemd service...", on_stdout, passthrough) emit_step("disabling + stopping systemd service...", on_stdout, passthrough)
disable_service( disable_service(
name, name,
@ -129,17 +128,6 @@ def stop_instance(
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel, should_cancel=should_cancel,
) )
emit_step("unmounting runtime overlay (if mounted)...", on_stdout, passthrough)
try:
_mounter.unmount(
merged=root / "runtime" / name / "merged",
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)
except subprocess.CalledProcessError:
pass
emit_step("stop complete.", on_stdout, passthrough) emit_step("stop complete.", on_stdout, passthrough)
@ -155,6 +143,10 @@ def _purge_instance(
instance_dir = root / "instances" / name instance_dir = root / "instances" / name
runtime_dir = root / "runtime" / name runtime_dir = root / "runtime" / name
# disable --now triggers ExecStopPost which unmounts. The try/except
# tolerates the unit-not-loaded case (e.g., delete on an instance that
# was initialized but never started — no unit, nothing to disable, no
# mount to clean up either).
emit_step("disabling + stopping systemd service (if running)...", on_stdout, passthrough) emit_step("disabling + stopping systemd service (if running)...", on_stdout, passthrough)
try: try:
disable_service( disable_service(
@ -167,18 +159,6 @@ def _purge_instance(
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass
emit_step("unmounting runtime overlay (if mounted)...", on_stdout, passthrough)
try:
_mounter.unmount(
merged=runtime_dir / "merged",
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)
except subprocess.CalledProcessError:
pass
emit_step("removing instance files...", on_stdout, passthrough) emit_step("removing instance files...", on_stdout, passthrough)
if instance_dir.exists(): if instance_dir.exists():
shutil.rmtree(instance_dir) shutil.rmtree(instance_dir)

View file

@ -1,76 +0,0 @@
from pathlib import Path
import pytest
HELPER_PATH = "/usr/local/libexec/left4me/left4me-overlay"
def test_mount_invokes_helper_with_name_only(monkeypatch: pytest.MonkeyPatch) -> None:
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
calls.append(list(cmd))
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
KernelOverlayFSMounter().mount(
lowerdirs="/var/lib/left4me/installation",
upperdir=Path("/var/lib/left4me/runtime/alpha/upper"),
workdir=Path("/var/lib/left4me/runtime/alpha/work"),
merged=Path("/var/lib/left4me/runtime/alpha/merged"),
)
assert calls == [["sudo", "-n", HELPER_PATH, "mount", "alpha"]]
def test_unmount_invokes_helper_with_umount_verb(monkeypatch: pytest.MonkeyPatch) -> None:
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
calls.append(list(cmd))
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
KernelOverlayFSMounter().unmount(merged=Path("/var/lib/left4me/runtime/alpha/merged"))
assert calls == [["sudo", "-n", HELPER_PATH, "umount", "alpha"]]
def test_mount_propagates_run_command_kwargs(monkeypatch: pytest.MonkeyPatch) -> None:
from l4d2host.fs.kernel_overlayfs import KernelOverlayFSMounter
captured: dict = {}
def fake_run_command(cmd, **kwargs):
captured["cmd"] = list(cmd)
captured["kwargs"] = kwargs
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
out: list[str] = []
err: list[str] = []
KernelOverlayFSMounter().mount(
lowerdirs="/var/lib/left4me/installation",
upperdir=Path("/var/lib/left4me/runtime/alpha/upper"),
workdir=Path("/var/lib/left4me/runtime/alpha/work"),
merged=Path("/var/lib/left4me/runtime/alpha/merged"),
on_stdout=out.append,
on_stderr=err.append,
passthrough=False,
should_cancel=lambda: False,
)
assert captured["cmd"][0:3] == ["sudo", "-n", HELPER_PATH]
captured["kwargs"]["on_stdout"]("hi")
captured["kwargs"]["on_stderr"]("oops")
assert out == ["hi"]
assert err == ["oops"]
assert captured["kwargs"]["passthrough"] is False
assert callable(captured["kwargs"]["should_cancel"])

View file

@ -29,7 +29,6 @@ def test_start_order(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
(instance_dir / "server.cfg").write_text("sv_consistency 1") (instance_dir / "server.cfg").write_text("sv_consistency 1")
(instance_dir / "spec.yaml").write_text("port: 27015\noverlays: [x, y]\n") (instance_dir / "spec.yaml").write_text("port: 27015\noverlays: [x, y]\n")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
start_instance("alpha", root=tmp_path) start_instance("alpha", root=tmp_path)
@ -73,7 +72,6 @@ def test_start_copies_per_overlay_aliases_and_sweeps_stale(
(src_7 / "server.cfg").write_text("ignored: alias not set\n") (src_7 / "server.cfg").write_text("ignored: alias not set\n")
(upper_cfg_dir / "server_orphan.cfg").write_text("from previous start\n") (upper_cfg_dir / "server_orphan.cfg").write_text("from previous start\n")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
start_instance("alpha", root=tmp_path) start_instance("alpha", root=tmp_path)
@ -105,7 +103,6 @@ def test_delete_succeeds_when_stop_service_fails(tmp_path: Path, monkeypatch: py
(tmp_path / "instances" / "alpha").mkdir(parents=True) (tmp_path / "instances" / "alpha").mkdir(parents=True)
(tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True) (tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
delete_instance("alpha", root=tmp_path) delete_instance("alpha", root=tmp_path)
@ -140,7 +137,6 @@ def test_reset_stops_unmounts_and_removes_dirs(tmp_path: Path, monkeypatch: pyte
(runtime_dir / "upper" / "logs").mkdir(parents=True) (runtime_dir / "upper" / "logs").mkdir(parents=True)
(runtime_dir / "upper" / "logs" / "console.log").write_text("noise") (runtime_dir / "upper" / "logs" / "console.log").write_text("noise")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
reset_instance("alpha", root=tmp_path) reset_instance("alpha", root=tmp_path)
@ -159,7 +155,6 @@ def test_reset_on_never_initialized_is_noop(tmp_path: Path, monkeypatch: pytest.
if "disable" in cmd: if "disable" in cmd:
raise subprocess.CalledProcessError(returncode=5, cmd=list(cmd), stderr="not loaded") raise subprocess.CalledProcessError(returncode=5, cmd=list(cmd), stderr="not loaded")
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
reset_instance("alpha", root=tmp_path) reset_instance("alpha", root=tmp_path)
@ -178,7 +173,6 @@ def test_delete_stopped_instance_removes_dirs(tmp_path: Path, monkeypatch: pytes
(tmp_path / "instances" / "alpha").mkdir(parents=True) (tmp_path / "instances" / "alpha").mkdir(parents=True)
(tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True) (tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command) monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
delete_instance("alpha", root=tmp_path) delete_instance("alpha", root=tmp_path)
@ -188,58 +182,7 @@ def test_delete_stopped_instance_removes_dirs(tmp_path: Path, monkeypatch: pytes
assert ["sudo", "-n", "/usr/local/libexec/left4me/left4me-systemctl", "disable", "alpha"] in calls assert ["sudo", "-n", "/usr/local/libexec/left4me/left4me-systemctl", "disable", "alpha"] in calls
def test_stop_succeeds_when_unmount_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: # test_stop_succeeds_when_unmount_fails / test_delete_succeeds_when_unmount_fails
umount_calls: list[list[str]] = [] # were removed when the Python-side unmount was dropped: the unit's
# ExecStopPost is now the single code path for unmount, so there's no
def fake_run_command(cmd, **kwargs): # Python-side failure to tolerate.
del kwargs
if cmd[:4] == [
"sudo",
"-n",
"/usr/local/libexec/left4me/left4me-overlay",
"umount",
]:
umount_calls.append(list(cmd))
raise subprocess.CalledProcessError(
returncode=1,
cmd=list(cmd),
stderr="umount: /var/lib/left4me/runtime/alpha/merged: not mounted",
)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
stop_instance("alpha", root=tmp_path)
assert umount_calls, "stop must always attempt the overlay helper (no preflight)"
def test_delete_succeeds_when_unmount_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
umount_calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
if cmd[:4] == [
"sudo",
"-n",
"/usr/local/libexec/left4me/left4me-overlay",
"umount",
]:
umount_calls.append(list(cmd))
raise subprocess.CalledProcessError(
returncode=1,
cmd=list(cmd),
stderr="umount: /var/lib/left4me/runtime/alpha/merged: not mounted",
)
(tmp_path / "instances" / "alpha").mkdir(parents=True)
(tmp_path / "runtime" / "alpha" / "merged").mkdir(parents=True)
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
delete_instance("alpha", root=tmp_path)
assert umount_calls, "delete must always attempt the overlay helper (no preflight)"
assert not (tmp_path / "instances" / "alpha").exists()
assert not (tmp_path / "runtime" / "alpha").exists()