fix(l4d2-host): mount overlay via ExecStartPre so enabled units boot cleanly
The lifecycle change to systemctl enable --now (commit 8552c55) made
units auto-start at boot. But the kernel-overlayfs mount is volatile
(reboot kills it), and the web app's start_instance only re-mounts in
response to a UI click. Result: at boot, systemd starts the unit, finds
empty merged/, CHDIR fails, Restart=on-failure spins forever (counter
hit 65 on ckn before this fix landed).
Fix:
- Unit gets `ExecStartPre=/usr/bin/sudo -n .../left4me-overlay mount %i`
so the overlay is established before the main process starts.
- Helper is now idempotent: if merged is already a mount point, exit 0.
Required because Restart=on-failure re-runs ExecStartPre on each
cycle, and the web-app's start_instance also calls the helper, so
both paths would otherwise collide on "already mounted".
- StartLimitBurst=5 + StartLimitIntervalSec=60s caps the restart loop
instead of letting it spin indefinitely on a fundamental failure.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b62fc08127
commit
519567e156
3 changed files with 52 additions and 2 deletions
|
|
@ -10,9 +10,18 @@ Group=left4me
|
|||
EnvironmentFile=/etc/left4me/host.env
|
||||
EnvironmentFile=/var/lib/left4me/instances/%i/instance.env
|
||||
WorkingDirectory=/var/lib/left4me/runtime/%i/merged/left4dead2
|
||||
# At boot the kernel-overlayfs mount is gone (mounts are volatile); the
|
||||
# web app's start_instance also pre-mounts but doesn't run on auto-start.
|
||||
# The helper is idempotent — a no-op if already mounted by the web app.
|
||||
ExecStartPre=/usr/bin/sudo -n /usr/local/libexec/left4me/left4me-overlay mount %i
|
||||
ExecStart=/var/lib/left4me/installation/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
# Bound the restart loop. Without these, a persistent ExecStartPre or
|
||||
# ExecStart failure spins indefinitely (default systemd has no cap when
|
||||
# Restart= is explicitly set without StartLimit*).
|
||||
StartLimitBurst=5
|
||||
StartLimitIntervalSec=60s
|
||||
|
||||
# Resource control baseline — see docs/superpowers/specs/2026-05-09-l4d2-server-host-perf-baseline-design.md
|
||||
Slice=l4d2-game.slice
|
||||
|
|
|
|||
|
|
@ -127,16 +127,30 @@ def exec_or_print(argv: list[str]) -> None:
|
|||
def cmd_mount(name: str) -> None:
|
||||
name = validate_name(name)
|
||||
r = root()
|
||||
runtime_name_dir = (r / "runtime" / name).resolve(strict=True)
|
||||
merged_for_check = (runtime_name_dir / "merged").resolve(strict=True)
|
||||
|
||||
# Idempotency for unit restart cycles: if a previous start mounted
|
||||
# successfully but ExecStart failed afterwards (and Restart=on-failure
|
||||
# fires another cycle), the second ExecStartPre would otherwise refuse
|
||||
# to mount-on-top. Short-circuit here so the second cycle just gets
|
||||
# straight to ExecStart. This also handles the dual-path case where
|
||||
# both the web app's start_instance and the unit's ExecStartPre call
|
||||
# the helper.
|
||||
if os.path.ismount(merged_for_check):
|
||||
if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1":
|
||||
print("ALREADY_MOUNTED")
|
||||
return
|
||||
|
||||
instance_env = r / "instances" / name / "instance.env"
|
||||
raw_lowerdirs = parse_lowerdirs(instance_env)
|
||||
|
||||
allowed_roots = [(r / sub).resolve() for sub in LOWERDIR_ALLOWLIST]
|
||||
canonical_lowerdirs = [str(canonical_under(allowed_roots, Path(p))) for p in raw_lowerdirs]
|
||||
|
||||
runtime_name_dir = (r / "runtime" / name).resolve(strict=True)
|
||||
upper = (runtime_name_dir / "upper").resolve(strict=True)
|
||||
work = (runtime_name_dir / "work").resolve(strict=True)
|
||||
merged = (runtime_name_dir / "merged").resolve(strict=True)
|
||||
merged = merged_for_check
|
||||
for label, path in (("upper", upper), ("work", work), ("merged", merged)):
|
||||
if path.parent != runtime_name_dir:
|
||||
die(f"{label} resolved outside runtime/{name}: {path}")
|
||||
|
|
|
|||
|
|
@ -78,6 +78,33 @@ def test_server_unit_contains_required_runtime_contract():
|
|||
assert "LockPersonality=true" in unit
|
||||
|
||||
|
||||
def test_server_unit_mounts_overlay_via_exec_start_pre():
|
||||
"""At boot, systemd auto-starts enabled units before the web app gets a
|
||||
chance to run start_instance's pre-start mount. The unit itself must
|
||||
re-mount the overlay so reboots are transparent. Pairs with the helper's
|
||||
idempotency check (test_overlay_helper_mount_is_idempotent_when_mounted).
|
||||
"""
|
||||
unit = SERVER_UNIT.read_text()
|
||||
assert (
|
||||
"ExecStartPre=/usr/bin/sudo -n /usr/local/libexec/left4me/left4me-overlay mount %i"
|
||||
in unit
|
||||
)
|
||||
# Bound the restart loop; without these, a CHDIR-failure (or any other
|
||||
# pre-start error) spins indefinitely.
|
||||
assert "StartLimitBurst=5" in unit
|
||||
assert "StartLimitIntervalSec=60s" in unit
|
||||
|
||||
|
||||
def test_overlay_helper_mount_is_idempotent_when_already_mounted():
|
||||
"""ExecStartPre runs on every Restart=on-failure cycle. If a previous
|
||||
start mounted successfully but ExecStart failed afterwards, the next
|
||||
ExecStartPre would re-mount on top -- which fails. The helper must
|
||||
short-circuit when merged is already a mount point.
|
||||
"""
|
||||
text = OVERLAY_HELPER.read_text()
|
||||
assert "os.path.ismount" in text
|
||||
|
||||
|
||||
def test_server_unit_contains_perf_baseline_directives():
|
||||
unit = SERVER_UNIT.read_text()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue