diff --git a/deploy/files/usr/local/lib/systemd/system/left4me-server@.service b/deploy/files/usr/local/lib/systemd/system/left4me-server@.service index e7414bd..9031f79 100644 --- a/deploy/files/usr/local/lib/systemd/system/left4me-server@.service +++ b/deploy/files/usr/local/lib/systemd/system/left4me-server@.service @@ -10,9 +10,18 @@ Group=left4me EnvironmentFile=/etc/left4me/host.env EnvironmentFile=/var/lib/left4me/instances/%i/instance.env WorkingDirectory=/var/lib/left4me/runtime/%i/merged/left4dead2 +# At boot the kernel-overlayfs mount is gone (mounts are volatile); the +# web app's start_instance also pre-mounts but doesn't run on auto-start. +# The helper is idempotent — a no-op if already mounted by the web app. +ExecStartPre=/usr/bin/sudo -n /usr/local/libexec/left4me/left4me-overlay mount %i ExecStart=/var/lib/left4me/installation/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS Restart=on-failure RestartSec=5 +# Bound the restart loop. Without these, a persistent ExecStartPre or +# ExecStart failure spins indefinitely (default systemd has no cap when +# Restart= is explicitly set without StartLimit*). +StartLimitBurst=5 +StartLimitIntervalSec=60s # Resource control baseline — see docs/superpowers/specs/2026-05-09-l4d2-server-host-perf-baseline-design.md Slice=l4d2-game.slice diff --git a/deploy/files/usr/local/libexec/left4me/left4me-overlay b/deploy/files/usr/local/libexec/left4me/left4me-overlay index e53eeb5..8b9203b 100644 --- a/deploy/files/usr/local/libexec/left4me/left4me-overlay +++ b/deploy/files/usr/local/libexec/left4me/left4me-overlay @@ -127,16 +127,30 @@ def exec_or_print(argv: list[str]) -> None: def cmd_mount(name: str) -> None: name = validate_name(name) r = root() + runtime_name_dir = (r / "runtime" / name).resolve(strict=True) + merged_for_check = (runtime_name_dir / "merged").resolve(strict=True) + + # Idempotency for unit restart cycles: if a previous start mounted + # successfully but ExecStart failed afterwards (and Restart=on-failure + # fires another cycle), the second ExecStartPre would otherwise refuse + # to mount-on-top. Short-circuit here so the second cycle just gets + # straight to ExecStart. This also handles the dual-path case where + # both the web app's start_instance and the unit's ExecStartPre call + # the helper. + if os.path.ismount(merged_for_check): + if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1": + print("ALREADY_MOUNTED") + return + instance_env = r / "instances" / name / "instance.env" raw_lowerdirs = parse_lowerdirs(instance_env) allowed_roots = [(r / sub).resolve() for sub in LOWERDIR_ALLOWLIST] canonical_lowerdirs = [str(canonical_under(allowed_roots, Path(p))) for p in raw_lowerdirs] - runtime_name_dir = (r / "runtime" / name).resolve(strict=True) upper = (runtime_name_dir / "upper").resolve(strict=True) work = (runtime_name_dir / "work").resolve(strict=True) - merged = (runtime_name_dir / "merged").resolve(strict=True) + merged = merged_for_check for label, path in (("upper", upper), ("work", work), ("merged", merged)): if path.parent != runtime_name_dir: die(f"{label} resolved outside runtime/{name}: {path}") diff --git a/deploy/tests/test_deploy_artifacts.py b/deploy/tests/test_deploy_artifacts.py index 96d0300..d0a307a 100644 --- a/deploy/tests/test_deploy_artifacts.py +++ b/deploy/tests/test_deploy_artifacts.py @@ -78,6 +78,33 @@ def test_server_unit_contains_required_runtime_contract(): assert "LockPersonality=true" in unit +def test_server_unit_mounts_overlay_via_exec_start_pre(): + """At boot, systemd auto-starts enabled units before the web app gets a + chance to run start_instance's pre-start mount. The unit itself must + re-mount the overlay so reboots are transparent. Pairs with the helper's + idempotency check (test_overlay_helper_mount_is_idempotent_when_mounted). + """ + unit = SERVER_UNIT.read_text() + assert ( + "ExecStartPre=/usr/bin/sudo -n /usr/local/libexec/left4me/left4me-overlay mount %i" + in unit + ) + # Bound the restart loop; without these, a CHDIR-failure (or any other + # pre-start error) spins indefinitely. + assert "StartLimitBurst=5" in unit + assert "StartLimitIntervalSec=60s" in unit + + +def test_overlay_helper_mount_is_idempotent_when_already_mounted(): + """ExecStartPre runs on every Restart=on-failure cycle. If a previous + start mounted successfully but ExecStart failed afterwards, the next + ExecStartPre would re-mount on top -- which fails. The helper must + short-circuit when merged is already a mount point. + """ + text = OVERLAY_HELPER.read_text() + assert "os.path.ismount" in text + + def test_server_unit_contains_perf_baseline_directives(): unit = SERVER_UNIT.read_text()