refactor(l4d2-host): unit's ExecStartPre is the sole code path to the mount

Before this change there were two callers of left4me-overlay mount:
the web app's start_instance (Python, in-process) and the unit's
ExecStartPre (shell, via sudo). The duplication invited divergence; the
helper's recently-added idempotency made both paths technically work
but at the cost of a "first wins" race and dead-code retry logic in
start_instance.

Drop the in-process _mounter.mount() call from start_instance. The web
app now only stages cfg files (which still must happen on the host
filesystem before mount, to avoid overlayfs copy-up changing ownership),
then asks systemd to enable+start the unit; the unit's ExecStartPre
does the mount.

Removed:
- os.path.ismount(merged) refusal in start_instance and its test
  (test_start_refuses_to_double_mount). The race the check guarded
  against is now handled by the helper's idempotency.
- _load_instance_env helper and the `os` import (both became dead).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
mwiegand 2026-05-09 12:54:05 +02:00
parent 3d9b7ef771
commit 56f5c30296
No known key found for this signature in database
3 changed files with 17 additions and 82 deletions

View file

@ -15,9 +15,10 @@ EnvironmentFile=/var/lib/left4me/instances/%i/instance.env
# runs in the unit's home (cwd doesn't matter for the mount helper); the
# ExecStart re-applies WorkingDirectory after the mount and finds the dir.
WorkingDirectory=-/var/lib/left4me/runtime/%i/merged/left4dead2
# At boot the kernel-overlayfs mount is gone (mounts are volatile); the
# web app's start_instance also pre-mounts but doesn't run on auto-start.
# The helper is idempotent — a no-op if already mounted by the web app.
# Single source of truth for the kernel-overlayfs mount: the web app's
# start_instance only stages cfg files and asks systemd to enable+start
# this unit; the actual `mount -t overlay` lives here so reboot auto-start
# works the same as a UI-driven start. Helper is idempotent.
ExecStartPre=/usr/bin/sudo -n /usr/local/libexec/left4me/left4me-overlay mount %i
ExecStart=/var/lib/left4me/installation/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS
Restart=on-failure

View file

@ -1,4 +1,3 @@
import os
from pathlib import Path
import shutil
import subprocess
@ -63,16 +62,6 @@ def initialize_instance(
emit_step("initialization complete.", on_stdout, passthrough)
def _load_instance_env(path: Path) -> dict[str, str]:
result: dict[str, str] = {}
for line in path.read_text().splitlines():
if "=" not in line:
continue
key, value = line.split("=", 1)
result[key] = value
return result
def start_instance(
name: str,
*,
@ -87,25 +76,14 @@ def start_instance(
instance_dir = root / "instances" / name
runtime_dir = root / "runtime" / name
env = _load_instance_env(instance_dir / "instance.env")
merged = runtime_dir / "merged"
if os.path.ismount(merged):
# Kernel overlayfs mounts persist when the web worker dies (unlike
# fuse daemons, which were reaped with their cgroup). Refuse rather
# than double-mount.
raise subprocess.CalledProcessError(
returncode=1,
cmd=["start_instance"],
stderr=f"runtime overlay already mounted at {merged}; refusing to double-mount",
)
# Stage cfg files in the upper layer BEFORE mounting. Writing through
# merged after the mount triggers overlayfs copy-up, which preserves the
# lower file's ownership — and a script-sandbox-built `server.cfg` is
# owned by `l4d2-sandbox`, not the worker. Pre-mount writes go straight to
# upper with the worker's uid; the kernel just shows them at the top of
# the merged stack once mounted.
# Stage cfg files in the upper layer. Writing here goes straight to the
# upper dir on the host filesystem with the worker's uid; the unit's
# ExecStartPre then mounts the overlay (single source of truth for the
# mount), and the kernel surfaces these files at the top of the merged
# stack. A script-sandbox-built lower-layer `server.cfg` is owned by
# `l4d2-sandbox`, not the worker — staging in upper sidesteps the
# ownership-preserving copy-up that would happen if we wrote through
# merged post-mount.
emit_step("staging server.cfg + per-overlay aliases in upper layer...", on_stdout, passthrough)
upper_cfg_dir = runtime_dir / "upper" / "left4dead2" / "cfg"
upper_cfg_dir.mkdir(parents=True, exist_ok=True)
@ -121,18 +99,6 @@ def start_instance(
continue
shutil.copy2(src, upper_cfg_dir / f"server_{o.alias}.cfg")
emit_step("mounting runtime overlay...", on_stdout, passthrough)
_mounter.mount(
lowerdirs=env["L4D2_LOWERDIRS"],
upperdir=runtime_dir / "upper",
workdir=runtime_dir / "work",
merged=merged,
on_stdout=on_stdout,
on_stderr=on_stderr,
passthrough=passthrough,
should_cancel=should_cancel,
)
emit_step("enabling + starting systemd service...", on_stdout, passthrough)
enable_service(
name,

View file

@ -34,14 +34,12 @@ def test_start_order(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
start_instance("alpha", root=tmp_path)
assert calls[0] == [
"sudo",
"-n",
"/usr/local/libexec/left4me/left4me-overlay",
"mount",
"alpha",
# The mount is now driven by the unit's ExecStartPre (single source of
# truth), so start_instance only stages the cfgs and asks systemd to
# enable+start the unit.
assert calls == [
["sudo", "-n", "/usr/local/libexec/left4me/left4me-systemctl", "enable", "alpha"],
]
assert calls[1] == ["sudo", "-n", "/usr/local/libexec/left4me/left4me-systemctl", "enable", "alpha"]
def test_start_copies_per_overlay_aliases_and_sweeps_stale(
@ -87,36 +85,6 @@ def test_start_copies_per_overlay_aliases_and_sweeps_stale(
assert not (upper_cfg_dir / "server_overlay_7.cfg").exists(), "no alias in spec → no copy"
def test_start_refuses_to_double_mount(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
calls: list[list[str]] = []
def fake_run_command(cmd, **kwargs):
del kwargs
calls.append(list(cmd))
instance_dir = tmp_path / "instances" / "alpha"
runtime_dir = tmp_path / "runtime" / "alpha"
(runtime_dir / "merged").mkdir(parents=True)
instance_dir.mkdir(parents=True)
(instance_dir / "instance.env").write_text("L4D2_PORT=27015\nL4D2_ARGS=\nL4D2_LOWERDIRS=/x\n")
(instance_dir / "server.cfg").write_text("")
merged = runtime_dir / "merged"
def fake_ismount(path):
return Path(path) == merged
monkeypatch.setattr("l4d2host.fs.kernel_overlayfs.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.service_control.run_command", fake_run_command)
monkeypatch.setattr("l4d2host.instances.os.path.ismount", fake_ismount)
with pytest.raises(subprocess.CalledProcessError) as exc_info:
start_instance("alpha", root=tmp_path)
assert "already mounted" in (exc_info.value.stderr or "")
assert calls == [], "no mount/start commands must be issued when refusing"
def test_delete_missing_is_noop(tmp_path: Path) -> None:
delete_instance("missing", root=tmp_path)