left4me/deploy/tests/test_deploy_artifacts.py
mwiegand 5eac51a93e
fix(deploy): wrap overlay helper with nsenter so it doesn't pin the unit's mount namespace
systemd's `+` Exec prefix removes sandbox/credentials but does NOT
detach from the unit's per-service mount namespace (created by
PrivateTmp/Protect*). The Python interpreter for the helper was
launched inside that namespace, and even though the helper internally
nsenter'd into PID 1 for the umount syscall, the calling Python
process itself never left the unit's namespace. Its existence pinned
the namespace alive, which kept the slave mount tree alive, which
made PID 1's umount return EBUSY for the entire duration of the
helper's run. The mount became unmountable the moment the helper
exited — empirically verified by polling /proc/*/ns/mnt during stop:
the only PID holding the dying namespace was the helper itself.

Wrap both ExecStartPre and ExecStopPost with `/usr/bin/nsenter
--mount=/proc/1/ns/mnt --` so the helper Python interpreter runs in
PID 1's mount namespace from the start. With the helper out of the
unit's namespace, umount succeeds first try once the cgroup empties.
Reset went from ~25 s with retry/lazy-fallback workarounds to ~0.5 s
clean.

Knock-on cleanups:
- Helper drops internal nsenter for the syscalls (already in PID 1's
  namespace), and drops the eager-retry loop + lazy-umount fallback +
  inner work_inner retry (no race left to ride out).
- Revert TimeoutStopSec=60s back to 15s.
- Tests updated to expect the new argv shapes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 15:13:59 +02:00

710 lines
28 KiB
Python

import os
import subprocess
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
DEPLOY = ROOT / "deploy"
WEB_UNIT = DEPLOY / "files/usr/local/lib/systemd/system/left4me-web.service"
SERVER_UNIT = DEPLOY / "files/usr/local/lib/systemd/system/left4me-server@.service"
GAME_SLICE = DEPLOY / "files/usr/local/lib/systemd/system/l4d2-game.slice"
BUILD_SLICE = DEPLOY / "files/usr/local/lib/systemd/system/l4d2-build.slice"
SYSCTL_CONF = DEPLOY / "files/etc/sysctl.d/99-left4me.conf"
GLOBAL_REFRESH_SERVICE = DEPLOY / "files/usr/local/lib/systemd/system/left4me-refresh-global-overlays.service"
GLOBAL_REFRESH_TIMER = DEPLOY / "files/usr/local/lib/systemd/system/left4me-refresh-global-overlays.timer"
SANDBOX_UNIT_DIR = DEPLOY / "files/usr/local/lib/systemd/system"
SYSTEMCTL_HELPER = DEPLOY / "files/usr/local/libexec/left4me/left4me-systemctl"
JOURNALCTL_HELPER = DEPLOY / "files/usr/local/libexec/left4me/left4me-journalctl"
OVERLAY_HELPER = DEPLOY / "files/usr/local/libexec/left4me/left4me-overlay"
SCRIPT_SANDBOX_HELPER = DEPLOY / "files/usr/local/libexec/left4me/left4me-script-sandbox"
SANDBOX_RESOLV_CONF = DEPLOY / "files/etc/left4me/sandbox-resolv.conf"
SUDOERS = DEPLOY / "files/etc/sudoers.d/left4me"
HOST_ENV = DEPLOY / "templates/etc/left4me/host.env"
WEB_ENV_TEMPLATE = DEPLOY / "templates/etc/left4me/web.env.template"
DEPLOY_SCRIPT = DEPLOY / "deploy-test-server.sh"
def test_global_unit_files_exist_at_product_level_paths():
assert WEB_UNIT.is_file()
assert SERVER_UNIT.is_file()
def test_web_unit_contains_required_runtime_contract():
unit = WEB_UNIT.read_text()
assert "User=left4me" in unit
assert "Group=left4me" in unit
assert "WorkingDirectory=/opt/left4me" in unit
assert "Environment=PATH=/opt/left4me/.venv/bin:" in unit
assert "EnvironmentFile=/etc/left4me/host.env" in unit
assert "EnvironmentFile=/etc/left4me/web.env" in unit
assert "ExecStart=/opt/left4me/.venv/bin/gunicorn" in unit
assert "--workers 1" in unit
assert "--threads 32" in unit
# NoNewPrivileges must remain unset because sudo (used by the overlay,
# systemctl and journalctl helpers) is setuid.
assert "NoNewPrivileges=true" not in unit
# Restored now that fuse-overlayfs propagation is no longer the mechanism.
assert "PrivateTmp=true" in unit
assert "ProtectSystem=full" in unit
assert "ReadWritePaths=/var/lib/left4me" in unit
# Mounts now happen in PID 1's namespace via the left4me-overlay helper,
# so MountFlags propagation is irrelevant — and the previous assumption
# that MountFlags=shared made it work was incorrect.
assert "MountFlags=" not in unit
def test_server_unit_contains_required_runtime_contract():
unit = SERVER_UNIT.read_text()
assert "User=left4me" in unit
assert "Group=left4me" in unit
assert "EnvironmentFile=/etc/left4me/host.env" in unit
assert "EnvironmentFile=/var/lib/left4me/instances/%i/instance.env" in unit
# `-` prefix: chdir failure is non-fatal so ExecStartPre can run the
# mount helper before the merged dir exists. ExecStart re-applies and
# finds the dir once the mount has landed.
assert "WorkingDirectory=-/var/lib/left4me/runtime/%i/merged/left4dead2" in unit
# ExecStart must invoke srcds_run from the *merged* overlay tree, not
# from installation/. srcds_run cds to its own dirname; if we point at
# installation/, the engine reads gameinfo.txt and addons from the lower
# layer and never sees overlay plugins (Metamod/SourceMod) or cfgs.
assert "ExecStart=/var/lib/left4me/runtime/%i/merged/srcds_run" in unit
assert "$L4D2_ARGS" in unit
assert "${L4D2_ARGS}" not in unit
assert "NoNewPrivileges=true" in unit
assert "PrivateTmp=true" in unit
assert "PrivateDevices=true" in unit
assert "ProtectHome=true" in unit
assert "ProtectSystem=strict" in unit
assert "ReadOnlyPaths=/var/lib/left4me/installation /var/lib/left4me/overlays" in unit
assert "ReadWritePaths=/var/lib/left4me/runtime/%i" in unit
assert "RestrictSUIDSGID=true" in unit
assert "LockPersonality=true" in unit
def test_server_unit_mounts_overlay_via_exec_start_pre():
"""At boot, systemd auto-starts enabled units before the web app gets a
chance to run start_instance's pre-start mount. The unit itself must
re-mount the overlay so reboots are transparent. Pairs with the helper's
idempotency check (test_overlay_helper_mount_is_idempotent_when_mounted).
The unit-level `nsenter --mount=/proc/1/ns/mnt --` is what makes
umount fast: without it, the helper Python process would inherit
the unit's per-service mount namespace and pin it alive, blocking
PID 1's umount until the helper exited. Wrapping with nsenter at
the Exec line puts the helper itself in PID 1's namespace.
"""
unit = SERVER_UNIT.read_text()
# `+` prefix: runs as PID 1 (root, no sandbox). Required because
# the unit has NoNewPrivileges=true, which blocks sudo's setuid
# escalation — and the helper needs root for the mount syscall.
assert (
"ExecStartPre=+/usr/bin/nsenter --mount=/proc/1/ns/mnt -- "
"/usr/local/libexec/left4me/left4me-overlay mount %i"
in unit
)
# Bound the restart loop; without these, a CHDIR-failure (or any other
# pre-start error) spins indefinitely.
assert "StartLimitBurst=5" in unit
assert "StartLimitIntervalSec=60s" in unit
def test_server_unit_unmounts_overlay_via_exec_stop_post():
"""Single source of truth for unmount, mirroring the mount path.
ExecStopPost (not ExecStop) so it runs after srcds has fully exited
and the cgroup is cleared.
Same nsenter-at-Exec-line wrapping as ExecStartPre — without it,
the helper process would itself hold a reference to the unit's
per-service mount namespace, and umount in PID 1 would loop on
EBUSY until the helper gave up. With it, umount succeeds first try.
"""
unit = SERVER_UNIT.read_text()
assert (
"ExecStopPost=+/usr/bin/nsenter --mount=/proc/1/ns/mnt -- "
"/usr/local/libexec/left4me/left4me-overlay umount %i"
in unit
)
def test_overlay_helper_mount_is_idempotent_when_already_mounted():
"""ExecStartPre runs on every Restart=on-failure cycle. If a previous
start mounted successfully but ExecStart failed afterwards, the next
ExecStartPre would re-mount on top -- which fails. The helper must
short-circuit when merged is already a mount point.
"""
text = OVERLAY_HELPER.read_text()
# Two ismount checks now: one in cmd_mount (skip if mounted),
# one in cmd_umount (skip if not mounted).
assert text.count("os.path.ismount") >= 2
def test_server_unit_contains_perf_baseline_directives():
unit = SERVER_UNIT.read_text()
# Slice membership.
assert "Slice=l4d2-game.slice" in unit
# CFS priority bump (no SCHED_FIFO).
assert "Nice=-5" in unit
assert "CPUSchedulingPolicy=" not in unit
# I/O priority.
assert "IOSchedulingClass=best-effort" in unit
assert "IOSchedulingPriority=4" in unit
# OOM ordering: game servers survive, sandbox dies first.
assert "OOMScoreAdjust=-200" in unit
# Memory caps with headroom for map-load spikes.
assert "MemoryHigh=1.5G" in unit
assert "MemoryMax=2G" in unit
# Bounded fork surface.
assert "TasksMax=256" in unit
# Plenty of fds for plugin-heavy setups.
assert "LimitNOFILE=65536" in unit
# srcds clean shutdown via SIGINT, with time to flush. With the
# helper running in PID 1's mount namespace (via the unit-level
# nsenter on ExecStopPost), umount has no race window and the
# default 15 s is plenty for the whole stop transition.
assert "KillSignal=SIGINT" in unit
assert "TimeoutStopSec=15s" in unit
# Per-unit override of journald rate limiting (default drops srcds output).
assert "LogRateLimitIntervalSec=0" in unit
def test_l4d2_game_slice_exists_with_high_weights():
assert GAME_SLICE.is_file()
text = GAME_SLICE.read_text()
assert "[Slice]" in text
assert "CPUWeight=1000" in text
assert "IOWeight=1000" in text
def test_l4d2_build_slice_exists_with_low_weights():
assert BUILD_SLICE.is_file()
text = BUILD_SLICE.read_text()
assert "[Slice]" in text
assert "CPUWeight=10" in text
assert "IOWeight=10" in text
def test_sysctl_conf_present_with_perf_settings():
assert SYSCTL_CONF.is_file()
text = SYSCTL_CONF.read_text()
for line in (
"net.core.rmem_max = 8388608",
"net.core.wmem_max = 8388608",
"net.core.rmem_default = 524288",
"net.core.wmem_default = 524288",
"net.core.netdev_max_backlog = 5000",
"net.core.netdev_budget = 600",
"vm.swappiness = 10",
):
assert line in text, f"missing {line!r} in 99-left4me.conf"
def test_script_sandbox_in_build_slice_with_oom_adjust():
text = SCRIPT_SANDBOX_HELPER.read_text()
# Put the transient unit in the low-weight build slice so it yields to
# game-server instances under CPU/IO contention.
assert "--slice=l4d2-build.slice" in text
# Sandbox dies first if the host hits memory pressure; servers
# (OOMScoreAdjust=-200) survive.
assert "-p OOMScoreAdjust=500" in text
def test_deploy_script_installs_perf_artifacts():
script = DEPLOY_SCRIPT.read_text()
# Slice files copied into the system-wide systemd unit dir.
assert "/usr/local/lib/systemd/system/l4d2-game.slice" in script
assert "/usr/local/lib/systemd/system/l4d2-build.slice" in script
# Sysctl drop-in installed under /etc/sysctl.d/.
assert "/etc/sysctl.d/99-left4me.conf" in script
# Values applied immediately, not on next boot.
assert "sysctl --system" in script
def test_deploy_script_writes_cpuset_drop_ins():
script = DEPLOY_SCRIPT.read_text()
# Reads nproc and binds defaults via ${VAR:-...}.
assert "nproc" in script
assert "LEFT4ME_SYSTEM_CPUS" in script
assert "LEFT4ME_GAME_CPUS" in script
assert "${LEFT4ME_SYSTEM_CPUS:-0}" in script
# Default game-core upper bound is computed from nproc; accept either
# the NPROC-1 form or LEFT4ME_GAME_CPUS:-1- prefix.
assert (
"1-$((NPROC - 1))" in script
or "1-$((NPROC-1))" in script
or "1-$((nproc-1))" in script
or "LEFT4ME_GAME_CPUS:-1-" in script
)
# All four drop-in paths.
for slice_name in ("system", "user", "l4d2-build", "l4d2-game"):
assert (
f"/etc/systemd/system/{slice_name}.slice.d/99-left4me-cpuset.conf"
in script
)
# Drop-ins use the existing install pattern.
assert "install -m 0644 -o root -g root" in script
# Single-core host: skip with a warning to stderr.
assert ("-lt 2" in script) or ("< 2" in script) or ("-ge 2" in script)
assert "skipping CPU isolation" in script
def _fake_command(tmp_path, command_name):
marker = tmp_path / f"{command_name}.args"
command = tmp_path / command_name
command.write_text(f"#!/bin/sh\nprintf '%s\n' \"$*\" > '{marker}'\nexit 0\n")
command.chmod(0o755)
return marker
def _env_with_fake_commands(tmp_path):
env = os.environ.copy()
env["PATH"] = f"{tmp_path}{os.pathsep}{env.get('PATH', '')}"
return env
def test_helpers_use_fixed_system_tool_paths_not_sudo_path():
systemctl = SYSTEMCTL_HELPER.read_text()
journalctl = JOURNALCTL_HELPER.read_text()
assert "command -v systemctl" not in systemctl
assert "command -v journalctl" not in journalctl
assert "/bin/systemctl" in systemctl or "/usr/bin/systemctl" in systemctl
assert "/bin/journalctl" in journalctl or "/usr/bin/journalctl" in journalctl
def test_systemctl_helper_passes_shell_syntax_check_and_rejects_bad_args(tmp_path):
subprocess.run(["sh", "-n", str(SYSTEMCTL_HELPER)], check=True)
marker = _fake_command(tmp_path, "systemctl")
for args in [
["bad/action", "alpha"],
# `start` and `stop` are no longer accepted verbs — the lifecycle now
# uses `enable`/`disable` for reboot survival via WantedBy= symlinks.
["start", "alpha"],
["stop", "alpha"],
["enable", ""],
["enable", ".hidden"],
["enable", "bad..name"],
["enable", "bad/name"],
["enable", "bad\\name"],
["enable", "bad name"],
]:
result = subprocess.run(["sh", str(SYSTEMCTL_HELPER), *args], env=_env_with_fake_commands(tmp_path), check=False)
assert result.returncode != 0
assert not marker.exists()
script = SYSTEMCTL_HELPER.read_text()
assert 'unit="left4me-server@${name}.service"' in script
assert 'enable) exec "$systemctl" enable --now "$unit"' in script
assert 'disable) exec "$systemctl" disable --now "$unit"' in script
assert "--property=ActiveState" in script
assert "--property=SubState" in script
def test_journalctl_helper_passes_shell_syntax_check_and_rejects_bad_args(tmp_path):
subprocess.run(["sh", "-n", str(JOURNALCTL_HELPER)], check=True)
marker = _fake_command(tmp_path, "journalctl")
for args in [
["../evil", "--lines", "25", "--no-follow"],
["alpha", "--bad", "25", "--no-follow"],
["alpha", "--lines", "not-number", "--no-follow"],
["alpha", "--lines", "25", "--bad-follow"],
["bad/name", "--lines", "25", "--no-follow"],
]:
result = subprocess.run(["sh", str(JOURNALCTL_HELPER), *args], env=_env_with_fake_commands(tmp_path), check=False)
assert result.returncode != 0
assert not marker.exists()
script = JOURNALCTL_HELPER.read_text()
assert 'unit="left4me-server@${name}.service"' in script
assert 'exec "$journalctl" -u "$unit" -n "$lines" -o cat "$follow_arg"' in script
assert 'exec "$journalctl" -u "$unit" -n "$lines" -o cat' in script
def test_sudoers_allows_only_left4me_helpers_not_raw_system_tools():
sudoers = SUDOERS.read_text()
assert (
"left4me ALL=(root) NOPASSWD: "
"/usr/local/libexec/left4me/left4me-systemctl *"
) in sudoers
assert (
"left4me ALL=(root) NOPASSWD: "
"/usr/local/libexec/left4me/left4me-journalctl *"
) in sudoers
assert "/usr/local/libexec/left4me/left4me-overlay mount *" in sudoers
assert "/usr/local/libexec/left4me/left4me-overlay umount *" in sudoers
assert (
"left4me ALL=(root) NOPASSWD: "
"/usr/local/libexec/left4me/left4me-script-sandbox"
) in sudoers
assert "/bin/systemctl" not in sudoers
assert "/usr/bin/systemctl" not in sudoers
assert "/bin/journalctl" not in sudoers
assert "/usr/bin/journalctl" not in sudoers
assert "/bin/mount" not in sudoers
assert "/bin/umount" not in sudoers
def test_overlay_helper_is_python_with_strict_validation():
text = OVERLAY_HELPER.read_text()
assert text.startswith("#!/usr/bin/python3")
# Validation surface
assert "NAME_RE = re.compile" in text
assert "LOWERDIR_ALLOWLIST" in text
assert "user.fuseoverlayfs." in text
assert "MAX_LOWERDIRS = 500" in text
# Mounts via PID 1's mount namespace
assert "/proc/1/ns/mnt" in text
assert "nsenter" in text
# Verbs are mount and umount (not unmount)
assert '"mount"' in text and '"umount"' in text
assert '"unmount"' not in text
def test_deploy_script_installs_overlay_helper_with_executable_mode():
script = DEPLOY_SCRIPT.read_text()
assert "/usr/local/libexec/left4me/left4me-overlay" in script
assert "chmod 0755" in script and "left4me-overlay" in script
def test_deploy_script_does_not_install_fuse_overlayfs_apt_dep():
# fuse-overlayfs / fuse3 were the previous mount engine; kernel overlayfs
# replaces them. Comments in the migration block may legitimately mention
# the names, so scope this to the actual apt-get / dnf install lines.
install_lines = [
line for line in DEPLOY_SCRIPT.read_text().splitlines()
if ("apt-get install" in line or "dnf install" in line)
]
assert install_lines, "expected at least one apt/dnf install line"
for line in install_lines:
assert "fuse-overlayfs" not in line, line
assert "fuse3" not in line, line
def test_deploy_script_runs_one_shot_kernel_overlay_migration():
script = DEPLOY_SCRIPT.read_text()
assert "/var/lib/left4me/.kernel-overlay-migrated" in script
# Migration should stop services + force-unmount stale mounts + wipe upper/work
assert "systemctl stop 'left4me-server@" in script
assert "systemctl stop left4me-web.service" in script
assert "findmnt -t overlay" in script
assert "/runtime/" in script and "rm -rf" in script and 'upper"' in script and 'work"' in script
def test_env_templates_contain_required_defaults():
host_env = HOST_ENV.read_text()
assert "Deployment units use fixed /var/lib/left4me paths" in host_env
assert host_env.endswith("LEFT4ME_ROOT=/var/lib/left4me\n")
assert WEB_ENV_TEMPLATE.read_text() == (
"DATABASE_URL=sqlite:////var/lib/left4me/left4me.db\n"
"SECRET_KEY=replace-with-generated-secret\n"
"JOB_WORKER_THREADS=4\n"
)
def test_deploy_script_has_safe_defaults_and_preserves_state() -> None:
script = DEPLOY_SCRIPT.read_text()
assert "useradd --system --home-dir /var/lib/left4me" in script
assert "/var/lib/left4me/installation" in script
assert "/var/lib/left4me/overlays" in script
assert "/var/lib/left4me/instances" in script
assert "/var/lib/left4me/runtime" in script
assert "tar" in script
assert "--exclude .venv" in script
assert "--exclude .claude" in script
assert "pip install -e /opt/left4me/l4d2host -e /opt/left4me/l4d2web" in script
assert "systemctl enable --now left4me-web.service" in script
assert "for attempt in" in script
assert "/opt/left4me/.venv" in script
assert "visudo -cf /etc/sudoers.d/left4me" in script
# Note: assertions about web.env's lifecycle (create-only-if-missing /
# never-sourced-from-deploy) used to live here. They became stale in
# commit caa8b83, which switched to "rewrite web.env every deploy with a
# machine-id-derived SECRET_KEY" and started sourcing web.env in the
# alembic + seed helper subprocesses. Removed entirely; current behavior
# is covered by `install -m 0640 ... /etc/left4me/web.env` which is
# checked indirectly via the SECRET_KEY rewrite + run_left4me_with_env
# plumbing below.
assert "run_left4me_with_env" in script
assert "LEFT4ME_ADMIN_USERNAME" in script
assert "LEFT4ME_ADMIN_PASSWORD" in script
assert "user already exists" in script
assert "deploy/files" in script
def test_deploy_script_does_not_recurse_into_runtime_state_mounts() -> None:
script = DEPLOY_SCRIPT.read_text()
assert "$sudo_cmd chown -R left4me:left4me /var/lib/left4me" not in script
assert "$sudo_cmd chown left4me:left4me \\" in script
assert "/var/lib/left4me/runtime \\" in script
assert "$sudo_cmd chown -R left4me:left4me /opt/left4me" in script
def test_deploy_script_runs_migrations_before_app_initialization() -> None:
script = DEPLOY_SCRIPT.read_text()
assert "alembic -c /opt/left4me/l4d2web/alembic.ini upgrade head" in script
assert "from l4d2web.app import create_app; create_app()" not in script
def test_deploy_script_shell_syntax() -> None:
subprocess.run(["sh", "-n", str(DEPLOY_SCRIPT)], check=True)
def test_globals_refresh_units_removed():
"""Global-overlays subsystem deleted in favor of script overlays."""
assert not GLOBAL_REFRESH_SERVICE.exists()
assert not GLOBAL_REFRESH_TIMER.exists()
def test_deploy_script_does_not_provision_globals_subsystem():
script = DEPLOY_SCRIPT.read_text()
# No mkdir/install of the deleted cache dir; mention in a one-shot
# `rm -rf` cleanup is fine.
for line in script.splitlines():
if "/var/lib/left4me/global_overlay_cache" not in line:
continue
assert "rm -rf" in line, line
assert "left4me-refresh-global-overlays" not in script
def test_deploy_script_provisions_sandbox_user():
script = DEPLOY_SCRIPT.read_text()
assert "useradd --system --no-create-home --shell /usr/sbin/nologin l4d2-sandbox" in script
def test_deploy_script_does_not_install_bubblewrap():
install_lines = [
line for line in DEPLOY_SCRIPT.read_text().splitlines()
if ("apt-get install" in line or "dnf install" in line)
]
assert install_lines, "expected at least one apt/dnf install line"
for line in install_lines:
assert "bubblewrap" not in line, line
assert "bwrap" not in line, line
def test_deploy_script_installs_script_overlay_tooling():
# Script overlays commonly need 7z and md5sum (e.g. l4d2center map sync).
# coreutils ships md5sum and is technically essential, but listing it
# explicitly makes the contract obvious and survives slim base images.
script = DEPLOY_SCRIPT.read_text().splitlines()
apt_lines = [l for l in script if "apt-get install" in l]
dnf_lines = [l for l in script if "dnf install" in l]
assert apt_lines, "expected an apt-get install line"
assert dnf_lines, "expected a dnf install line"
for line in apt_lines:
assert "p7zip-full" in line, line
assert "coreutils" in line, line
for line in dnf_lines:
# Fedora/RHEL split: p7zip provides 7za, p7zip-plugins provides 7z.
assert "p7zip" in line and "p7zip-plugins" in line, line
assert "coreutils" in line, line
def test_deploy_script_tightens_left4me_db_permissions():
script = DEPLOY_SCRIPT.read_text()
# The DB and its WAL/SHM sidecars must be left4me:left4me 0640 — owner
# (web service) keeps rw, group is read-only, "other" (incl. l4d2-sandbox)
# gets nothing. The sidecars matter because SQLite in WAL mode requires
# write access to all three; if a sidecar ends up root-owned (e.g. from
# ad-hoc root-side inspection), the next write fails as "readonly db".
assert "chown left4me:left4me" in script
assert "chmod 0640" in script
for db_file in (
"/var/lib/left4me/left4me.db",
"/var/lib/left4me/left4me.db-wal",
"/var/lib/left4me/left4me.db-shm",
):
assert db_file in script, f"deploy script must touch {db_file}"
def test_deploy_script_installs_script_sandbox_helper():
script = DEPLOY_SCRIPT.read_text()
assert "/usr/local/libexec/left4me/left4me-script-sandbox" in script
assert "chmod 0755" in script and "left4me-script-sandbox" in script
def test_script_sandbox_helper_present():
assert SCRIPT_SANDBOX_HELPER.is_file()
assert SCRIPT_SANDBOX_HELPER.read_text().startswith("#!/bin/bash")
mode = SCRIPT_SANDBOX_HELPER.stat().st_mode & 0o777
assert mode == 0o755, f"expected 0755, got {oct(mode)}"
def test_script_sandbox_helper_passes_shell_syntax_check():
subprocess.run(["bash", "-n", str(SCRIPT_SANDBOX_HELPER)], check=True)
def test_script_sandbox_helper_invokes_systemd_run_with_hardening():
text = SCRIPT_SANDBOX_HELPER.read_text()
# systemd-run service mode (no --scope), with synchronous I/O to caller.
assert "systemd-run" in text
assert "--scope" not in text, "v2 uses transient service units, not scopes"
assert "--pipe" in text
assert "--wait" in text
assert "--collect" in text
assert "--unit=" in text
# No bwrap.
assert "bwrap" not in text
assert "bubblewrap" not in text
# UID drop via systemd directives.
assert "User=l4d2-sandbox" in text
assert "Group=l4d2-sandbox" in text
# Cgroup limits unchanged from v1.
assert "MemoryMax=4G" in text
assert "MemorySwapMax=0" in text
assert "TasksMax=512" in text
assert "CPUQuota=200%" in text
assert "RuntimeMaxSec=3600" in text
# Hardening directives that v1 (scope mode) couldn't carry.
assert "NoNewPrivileges=yes" in text
assert "ProtectSystem=strict" in text
assert "ProtectHome=yes" in text
assert "PrivateTmp=yes" in text
assert "PrivateDevices=yes" in text
assert "PrivateIPC=yes" in text
assert "ProtectKernelTunables=yes" in text
assert "ProtectKernelModules=yes" in text
assert "ProtectKernelLogs=yes" in text
assert "ProtectControlGroups=yes" in text
assert "RestrictNamespaces=yes" in text
assert "RestrictSUIDSGID=yes" in text
assert "LockPersonality=yes" in text
assert "MemoryDenyWriteExecute=yes" in text
assert "SystemCallFilter=" in text
assert "@system-service" in text
assert "@network-io" in text
assert "CapabilityBoundingSet=" in text
assert "AmbientCapabilities=" in text
assert 'RestrictAddressFamilies="AF_INET AF_INET6 AF_UNIX"' in text
# Network namespace stays shared with host.
assert "PrivateNetwork=" not in text
# Mount setup: /etc and /var/lib masked with tmpfs; selective binds back.
assert 'TemporaryFileSystem="/etc /var/lib"' in text
assert "BindReadOnlyPaths=" in text
# The resolv.conf bind points at the sandbox-only file (not the host's
# /etc/resolv.conf, which typically references a private-IP DNS server
# that IPAddressDeny= blocks).
assert "/etc/left4me/sandbox-resolv.conf:/etc/resolv.conf" in text
assert "/etc/ssl" in text
assert "/etc/ca-certificates" in text
assert "/etc/nsswitch.conf" in text
assert "/etc/alternatives" in text
assert "${SCRIPT}:/script.sh" in text
assert 'BindPaths="${OVERLAY_DIR}:/overlay"' in text
# IP egress filter: allow public, deny localhost / RFC1918 / link-local /
# multicast / CGNAT / ULA. systemd's "more specific rule wins" semantics
# mean public IPs hit the allow and listed ranges hit the deny.
# IPAddressDeny alone — no IPAddressAllow=any. Empirically, having both
# set causes the allow to win on this systemd/kernel combo regardless of
# the documented "more specific rule wins" behaviour. With only Deny,
# the kernel's default "allow all" applies to non-listed addresses.
assert "IPAddressDeny=" in text
assert "IPAddressAllow=any" not in text
# Explicit CIDRs — systemd-run's -p parser doesn't accept the
# `localhost` / `link-local` / `multicast` shorthand keywords that
# work in unit files (only the full strings parse).
for token in (
"127.0.0.0/8",
"::1/128",
"169.254.0.0/16",
"fe80::/10",
"224.0.0.0/4",
"ff00::/8",
"10.0.0.0/8",
"172.16.0.0/12",
"192.168.0.0/16",
"100.64.0.0/10",
"fc00::/7",
):
assert token in text, f"missing {token!r} in IPAddressDeny set"
def test_sandbox_resolv_conf_exists():
assert SANDBOX_RESOLV_CONF.is_file()
text = SANDBOX_RESOLV_CONF.read_text()
nameservers = [
line.split()[1]
for line in text.splitlines()
if line.startswith("nameserver ")
]
assert len(nameservers) >= 2, "expected at least two nameservers for redundancy"
# Sanity: the resolvers must be public (not RFC1918 / loopback). We don't
# pin the exact IPs — Cloudflare/Google/Quad9 are all acceptable.
for ns in nameservers:
assert not ns.startswith("127."), ns
assert not ns.startswith("10."), ns
assert not ns.startswith("192.168."), ns
first_octet = int(ns.split(".")[0])
# Reject 172.16.0.0/12.
if first_octet == 172:
second_octet = int(ns.split(".")[1])
assert not (16 <= second_octet <= 31), ns
def test_deploy_script_installs_sandbox_resolv_conf():
script = DEPLOY_SCRIPT.read_text()
assert "deploy/files/etc/left4me/sandbox-resolv.conf" in script
assert "/etc/left4me/sandbox-resolv.conf" in script
def test_script_sandbox_helper_validates_overlay_id():
text = SCRIPT_SANDBOX_HELPER.read_text()
# Numeric-only overlay id
assert '[[ "$OVERLAY_ID" =~ ^[0-9]+$ ]]' in text
# Overlay dir must exist
assert "/var/lib/left4me/overlays/" in text
assert "[[ -d $OVERLAY_DIR ]]" in text
# Script path must exist
assert "[[ -f $SCRIPT ]]" in text
def test_script_sandbox_helper_dry_run_mode(tmp_path):
overlay_root = tmp_path / "var/lib/left4me/overlays/42"
overlay_root.mkdir(parents=True)
fake_script = tmp_path / "fake.sh"
fake_script.write_text("echo hi")
# Run in DRY_RUN mode against a fake l4d2-sandbox UID via a tiny shim that
# simulates `id -u l4d2-sandbox` resolving to a valid number.
helper_text = SCRIPT_SANDBOX_HELPER.read_text()
# We can't actually exec this without root + a real sandbox user; just
# verify the dry-run guard short-circuits before systemd-run / bwrap.
assert 'LEFT4ME_SCRIPT_SANDBOX_DRY_RUN' in helper_text
assert 'exit 0' in helper_text