Symmetric with the earlier mount cleanup (commits 519567e..a982995). Until now, the unit's ExecStartPre handled mount but the Python side still drove unmount: stop_instance and _purge_instance both called _mounter.unmount, which wrapped sudo + the helper. Two code paths for two halves of the same lifecycle. Move unmount into the unit: - ExecStopPost=+/usr/local/libexec/left4me/left4me-overlay umount %i (ExecStopPost, not ExecStop, so it runs after the cgroup is cleared; ExecStop runs while srcds is alive and would EBUSY the umount syscall.) - Helper's umount verb is now idempotent (mirrors mount): if merged isn't a mount point, return early. PRINT_ONLY mode bypasses both short-circuits so the unit tests still exercise the full nsenter argv. Drop the dead Python machinery: - _mounter.unmount(...) calls in stop_instance and _purge_instance - _mounter global + KernelOverlayFSMounter import - The whole l4d2host/fs/ package (OverlayMounter ABC + KernelOverlayFSMounter class) — no production callers, just self-tests - l4d2host/tests/test_kernel_overlayfs.py - test_stop_succeeds_when_unmount_fails / test_delete_succeeds_when_unmount_fails (tested Python-side unmount-failure tolerance that no longer exists) - The l4d2host.fs.kernel_overlayfs.run_command monkeypatches in lifecycle tests After this, the only thing start_instance does beyond cfg-staging is ask systemd to enable+start the unit. stop/delete/reset only ask systemd to disable; the overlay lifecycle lives entirely in the unit file. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
216 lines
7.3 KiB
Python
216 lines
7.3 KiB
Python
from pathlib import Path
|
|
import shutil
|
|
import subprocess
|
|
from typing import Callable
|
|
|
|
from l4d2host.paths import DEFAULT_LEFT4ME_ROOT, get_left4me_root, overlay_path, validate_instance_name
|
|
from l4d2host.service_control import disable_service, enable_service
|
|
from l4d2host.spec import load_spec
|
|
|
|
|
|
from l4d2host.logging import emit_step
|
|
|
|
|
|
DEFAULT_ROOT = DEFAULT_LEFT4ME_ROOT
|
|
|
|
|
|
def initialize_instance(
|
|
name: str,
|
|
spec_path: Path,
|
|
*,
|
|
root: Path | None = None,
|
|
on_stdout: Callable[[str], None] | None = None,
|
|
on_stderr: Callable[[str], None] | None = None,
|
|
passthrough: bool = False,
|
|
should_cancel: Callable[[], bool] | None = None,
|
|
) -> None:
|
|
name = validate_instance_name(name)
|
|
root = get_left4me_root() if root is None else Path(root)
|
|
spec = load_spec(spec_path)
|
|
|
|
emit_step("creating instance directories...", on_stdout, passthrough)
|
|
instance_dir = root / "instances" / name
|
|
runtime_dir = root / "runtime" / name
|
|
(runtime_dir / "upper").mkdir(parents=True, exist_ok=True)
|
|
(runtime_dir / "work").mkdir(parents=True, exist_ok=True)
|
|
(runtime_dir / "merged").mkdir(parents=True, exist_ok=True)
|
|
instance_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
lowerdirs = [str(overlay_path(o.path, root=root)) for o in spec.overlays]
|
|
lowerdirs.append(str(root / "installation"))
|
|
|
|
emit_step("writing instance.env...", on_stdout, passthrough)
|
|
instance_env = "\n".join(
|
|
[
|
|
f"L4D2_PORT={spec.port}",
|
|
f"L4D2_ARGS={' '.join(spec.arguments)}",
|
|
f"L4D2_LOWERDIRS={':'.join(lowerdirs)}",
|
|
]
|
|
) + "\n"
|
|
(instance_dir / "instance.env").write_text(instance_env)
|
|
|
|
emit_step("writing server.cfg...", on_stdout, passthrough)
|
|
server_cfg = "\n".join(spec.config) if spec.config else ""
|
|
(instance_dir / "server.cfg").write_text(server_cfg)
|
|
|
|
emit_step("persisting spec...", on_stdout, passthrough)
|
|
shutil.copy2(spec_path, instance_dir / "spec.yaml")
|
|
emit_step("initialization complete.", on_stdout, passthrough)
|
|
|
|
|
|
def start_instance(
|
|
name: str,
|
|
*,
|
|
root: Path | None = None,
|
|
on_stdout: Callable[[str], None] | None = None,
|
|
on_stderr: Callable[[str], None] | None = None,
|
|
passthrough: bool = False,
|
|
should_cancel: Callable[[], bool] | None = None,
|
|
) -> None:
|
|
name = validate_instance_name(name)
|
|
root = get_left4me_root() if root is None else Path(root)
|
|
instance_dir = root / "instances" / name
|
|
runtime_dir = root / "runtime" / name
|
|
|
|
# Stage cfg files in the upper layer. Writing here goes straight to the
|
|
# upper dir on the host filesystem with the worker's uid; the unit's
|
|
# ExecStartPre then mounts the overlay (single source of truth for the
|
|
# mount), and the kernel surfaces these files at the top of the merged
|
|
# stack. A script-sandbox-built lower-layer `server.cfg` is owned by
|
|
# `l4d2-sandbox`, not the worker — staging in upper sidesteps the
|
|
# ownership-preserving copy-up that would happen if we wrote through
|
|
# merged post-mount.
|
|
emit_step("staging server.cfg + per-overlay aliases in upper layer...", on_stdout, passthrough)
|
|
upper_cfg_dir = runtime_dir / "upper" / "left4dead2" / "cfg"
|
|
upper_cfg_dir.mkdir(parents=True, exist_ok=True)
|
|
for stale in upper_cfg_dir.glob("server*.cfg"):
|
|
stale.unlink()
|
|
shutil.copy2(instance_dir / "server.cfg", upper_cfg_dir / "server.cfg")
|
|
spec = load_spec(instance_dir / "spec.yaml")
|
|
for o in spec.overlays:
|
|
if not o.alias:
|
|
continue
|
|
src = root / "overlays" / o.path / "left4dead2" / "cfg" / "server.cfg"
|
|
if not src.exists():
|
|
continue
|
|
shutil.copy2(src, upper_cfg_dir / f"server_{o.alias}.cfg")
|
|
|
|
emit_step("enabling + starting systemd service...", on_stdout, passthrough)
|
|
enable_service(
|
|
name,
|
|
on_stdout=on_stdout,
|
|
on_stderr=on_stderr,
|
|
passthrough=passthrough,
|
|
should_cancel=should_cancel,
|
|
)
|
|
emit_step("start complete.", on_stdout, passthrough)
|
|
|
|
|
|
def stop_instance(
|
|
name: str,
|
|
*,
|
|
root: Path | None = None,
|
|
on_stdout: Callable[[str], None] | None = None,
|
|
on_stderr: Callable[[str], None] | None = None,
|
|
passthrough: bool = False,
|
|
should_cancel: Callable[[], bool] | None = None,
|
|
) -> None:
|
|
name = validate_instance_name(name)
|
|
root = get_left4me_root() if root is None else Path(root)
|
|
# `disable --now` triggers the unit's ExecStopPost, which unmounts the
|
|
# overlay. Single source of truth for unmount lives in the unit file;
|
|
# no Python-side unmount needed.
|
|
emit_step("disabling + stopping systemd service...", on_stdout, passthrough)
|
|
disable_service(
|
|
name,
|
|
on_stdout=on_stdout,
|
|
on_stderr=on_stderr,
|
|
passthrough=passthrough,
|
|
should_cancel=should_cancel,
|
|
)
|
|
emit_step("stop complete.", on_stdout, passthrough)
|
|
|
|
|
|
def _purge_instance(
|
|
name: str,
|
|
*,
|
|
root: Path,
|
|
on_stdout: Callable[[str], None] | None,
|
|
on_stderr: Callable[[str], None] | None,
|
|
passthrough: bool,
|
|
should_cancel: Callable[[], bool] | None,
|
|
) -> None:
|
|
instance_dir = root / "instances" / name
|
|
runtime_dir = root / "runtime" / name
|
|
|
|
# disable --now triggers ExecStopPost which unmounts. The try/except
|
|
# tolerates the unit-not-loaded case (e.g., delete on an instance that
|
|
# was initialized but never started — no unit, nothing to disable, no
|
|
# mount to clean up either).
|
|
emit_step("disabling + stopping systemd service (if running)...", on_stdout, passthrough)
|
|
try:
|
|
disable_service(
|
|
name,
|
|
on_stdout=on_stdout,
|
|
on_stderr=on_stderr,
|
|
passthrough=passthrough,
|
|
should_cancel=should_cancel,
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
pass
|
|
|
|
emit_step("removing instance files...", on_stdout, passthrough)
|
|
if instance_dir.exists():
|
|
shutil.rmtree(instance_dir)
|
|
if runtime_dir.exists():
|
|
shutil.rmtree(runtime_dir)
|
|
|
|
|
|
def delete_instance(
|
|
name: str,
|
|
*,
|
|
root: Path | None = None,
|
|
on_stdout: Callable[[str], None] | None = None,
|
|
on_stderr: Callable[[str], None] | None = None,
|
|
passthrough: bool = False,
|
|
should_cancel: Callable[[], bool] | None = None,
|
|
) -> None:
|
|
name = validate_instance_name(name)
|
|
root = get_left4me_root() if root is None else Path(root)
|
|
instance_dir = root / "instances" / name
|
|
runtime_dir = root / "runtime" / name
|
|
|
|
if not instance_dir.exists() and not runtime_dir.exists():
|
|
return
|
|
|
|
_purge_instance(
|
|
name,
|
|
root=root,
|
|
on_stdout=on_stdout,
|
|
on_stderr=on_stderr,
|
|
passthrough=passthrough,
|
|
should_cancel=should_cancel,
|
|
)
|
|
emit_step("delete complete.", on_stdout, passthrough)
|
|
|
|
|
|
def reset_instance(
|
|
name: str,
|
|
*,
|
|
root: Path | None = None,
|
|
on_stdout: Callable[[str], None] | None = None,
|
|
on_stderr: Callable[[str], None] | None = None,
|
|
passthrough: bool = False,
|
|
should_cancel: Callable[[], bool] | None = None,
|
|
) -> None:
|
|
name = validate_instance_name(name)
|
|
root = get_left4me_root() if root is None else Path(root)
|
|
_purge_instance(
|
|
name,
|
|
root=root,
|
|
on_stdout=on_stdout,
|
|
on_stderr=on_stderr,
|
|
passthrough=passthrough,
|
|
should_cancel=should_cancel,
|
|
)
|
|
emit_step("reset complete; next start will reinitialize from blueprint.", on_stdout, passthrough)
|