deploy/files: annotate reference units with per-directive hardening comments
Update the educational reference copies of left4me-server@.service and left4me-web.service to match the new hardening composition from the ckn-bw reactor (HARDENING_COMMON + HARDENING_SERVER / HARDENING_WEB). Per-directive comments explain each defense's purpose and the threat it addresses, so a cold reader of this repo can understand the threat model from the unit file alone. Top-of-file note in each reference points at the ckn-bw reactor as the live source; reference is hand-synced. gunicorn ExecStart in the web reference uses placeholder '--workers 4 --threads 4' values; live emission interpolates from metadata. This is the documented divergence between the reference and the deployed unit. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7c64910c90
commit
8e678b6765
2 changed files with 152 additions and 59 deletions
|
|
@ -1,10 +1,21 @@
|
|||
# left4me gameserver — system unit, one instance per gameserver.
|
||||
#
|
||||
# This is the REFERENCE COPY of the deployed unit. The live source is
|
||||
# the systemd/units reactor at ~/Projekte/ckn-bw/bundles/left4me/metadata.py
|
||||
# (look for 'left4me-server@.service'). Hardening directives live in
|
||||
# the HARDENING_SERVER constant near the top of the same file.
|
||||
# This file is hand-synced; edit both together.
|
||||
#
|
||||
# Threat model: docs/superpowers/specs/2026-05-15-hardening-threat-model.md
|
||||
# Defenses survey: docs/superpowers/specs/2026-05-15-hardening-defenses-survey.md
|
||||
# Test plan + results: docs/superpowers/specs/2026-05-15-hardening-test-plan.md
|
||||
|
||||
[Unit]
|
||||
Description=left4me server instance %i
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
# Bound the restart loop. Without these, a persistent ExecStartPre or
|
||||
# ExecStart failure spins indefinitely. Note: these are [Unit]-section
|
||||
# directives (systemd 230+), not [Service].
|
||||
# ExecStart failure spins indefinitely.
|
||||
StartLimitBurst=5
|
||||
StartLimitIntervalSec=60s
|
||||
|
||||
|
|
@ -14,49 +25,25 @@ User=left4me
|
|||
Group=left4me
|
||||
EnvironmentFile=/etc/left4me/host.env
|
||||
EnvironmentFile=/var/lib/left4me/instances/%i/instance.env
|
||||
# `-` prefix: chdir failure is non-fatal. systemd applies WorkingDirectory
|
||||
# before every Exec line — including ExecStartPre — but the merged dir only
|
||||
# exists once ExecStartPre's overlay mount succeeds. With `-`, ExecStartPre
|
||||
# runs in the unit's home (cwd doesn't matter for the mount helper); the
|
||||
# ExecStart re-applies WorkingDirectory after the mount and finds the dir.
|
||||
# `-` prefix: chdir failure is non-fatal. The merged dir only exists
|
||||
# once ExecStartPre's overlay mount succeeds.
|
||||
WorkingDirectory=-/var/lib/left4me/runtime/%i/merged/left4dead2
|
||||
# Single source of truth for the kernel-overlayfs mount lifecycle: the web
|
||||
# app's start_instance only stages cfg files and asks systemd to enable+
|
||||
# start this unit; the actual `mount -t overlay` lives here so reboot
|
||||
# auto-start works the same as a UI-driven start. ExecStopPost mirrors it
|
||||
# so the unmount lives in the same place — no Python-side _mounter needed
|
||||
# in stop/delete/reset paths. Both helper verbs are idempotent.
|
||||
#
|
||||
# `+` prefix runs the helper as PID 1 (root, no sandbox). Required because
|
||||
# the unit has NoNewPrivileges=true, which blocks sudo's setuid escalation
|
||||
# — and the helper itself needs root for the mount/umount syscalls.
|
||||
#
|
||||
# `nsenter --mount=/proc/1/ns/mnt --` runs the helper Python interpreter
|
||||
# in PID 1's mount namespace. Without this, the `+` prefix removes the
|
||||
# sandbox/credentials but does NOT detach from the unit's per-service
|
||||
# mount namespace (created by PrivateTmp/Protect*) — so the helper
|
||||
# process itself would hold a reference to that namespace, keeping the
|
||||
# slave-mount tree alive after the cgroup empties, and umount in PID 1
|
||||
# would return EBUSY for as long as the helper ran. Putting nsenter at
|
||||
# the unit-level (as opposed to inside the helper, where only the
|
||||
# umount syscall escaped) is what actually frees the namespace. Once
|
||||
# the helper is in PID 1's namespace, ExecStopPost's umount succeeds
|
||||
# on the first try with no retry/race window. ExecStopPost (not
|
||||
# ExecStop) so unmount runs after the cgroup is cleared; ExecStop runs
|
||||
# while srcds is still alive and would EBUSY.
|
||||
# `+` prefix runs the helper as PID 1 (root, all caps, host
|
||||
# namespaces) — required because the unit has NoNewPrivileges=true
|
||||
# AND PrivateUsers=true; both block sudo's setuid path. nsenter into
|
||||
# PID 1's mount namespace ensures the umount in ExecStopPost succeeds
|
||||
# without EBUSY from the unit's own slave-mount tree.
|
||||
ExecStartPre=+/usr/bin/nsenter --mount=/proc/1/ns/mnt -- /usr/local/libexec/left4me/left4me-overlay mount %i
|
||||
# Run from the merged overlay, NOT installation/. srcds_run is a shell
|
||||
# script that `cd`s to its own dirname before exec'ing srcds_linux, so the
|
||||
# binary's path determines where the engine reads gameinfo.txt and addons
|
||||
# from — WorkingDirectory has no effect. Invoking installation/srcds_run
|
||||
# would resolve everything against the lower layer and never see overlay-
|
||||
# provided plugins (Metamod/SourceMod) or cfgs (zonemod, confogl).
|
||||
# Run from the merged overlay, NOT installation/. srcds_run cds to its
|
||||
# own dirname before exec'ing srcds_linux; the binary's path determines
|
||||
# gameinfo + addons lookup.
|
||||
ExecStart=/var/lib/left4me/runtime/%i/merged/srcds_run -game left4dead2 +hostport ${L4D2_PORT} $L4D2_ARGS
|
||||
ExecStopPost=+/usr/bin/nsenter --mount=/proc/1/ns/mnt -- /usr/local/libexec/left4me/left4me-overlay umount %i
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
# Resource control baseline — see docs/superpowers/specs/2026-05-09-l4d2-server-host-perf-baseline-design.md
|
||||
# === Resource control baseline ===
|
||||
# See docs/superpowers/specs/2026-05-09-l4d2-server-host-perf-baseline-design.md
|
||||
Slice=l4d2-game.slice
|
||||
Nice=-5
|
||||
IOSchedulingClass=best-effort
|
||||
|
|
@ -70,16 +57,72 @@ KillSignal=SIGINT
|
|||
TimeoutStopSec=15s
|
||||
LogRateLimitIntervalSec=0
|
||||
|
||||
# Hardening (unchanged from previous baseline).
|
||||
NoNewPrivileges=true
|
||||
# === Identity / privilege drop ===
|
||||
NoNewPrivileges=true # block setuid escalation (defense: D3)
|
||||
RestrictSUIDSGID=true # block setuid()/setgid() syscalls
|
||||
CapabilityBoundingSet= # drop all caps — no privilege to escalate
|
||||
AmbientCapabilities=
|
||||
|
||||
# === Filesystem virtualization ===
|
||||
# Mask /var/lib, /etc, /opt, etc. with empty tmpfs; bind back only
|
||||
# what srcds needs. The DB (/var/lib/left4me/left4me.db) and web.env
|
||||
# (/etc/left4me/web.env) are intentionally not bound — they don't
|
||||
# exist in this unit's filesystem view (defenses: D1.a, D1.b).
|
||||
TemporaryFileSystem=/var/lib /etc /opt /home /root /srv /mnt /media
|
||||
BindReadOnlyPaths=/var/lib/left4me/installation
|
||||
BindReadOnlyPaths=/var/lib/left4me/overlays
|
||||
BindReadOnlyPaths=/etc/left4me/host.env
|
||||
BindReadOnlyPaths=/etc/ssl
|
||||
BindReadOnlyPaths=/etc/ca-certificates
|
||||
BindReadOnlyPaths=/etc/resolv.conf
|
||||
BindReadOnlyPaths=/etc/nsswitch.conf
|
||||
BindReadOnlyPaths=/etc/alternatives
|
||||
BindPaths=/var/lib/left4me/runtime/%i
|
||||
ProtectSystem=strict # belt-and-braces with TemporaryFileSystem
|
||||
ProtectHome=true
|
||||
|
||||
# === Process namespacing ===
|
||||
PrivateUsers=true # own user namespace; cross-uid ptrace blocked (D2)
|
||||
PrivatePIDs=true # own PID namespace; hides peer-srcds + gunicorn (D2.b, D5)
|
||||
PrivateTmp=true
|
||||
PrivateDevices=true
|
||||
ProtectHome=true
|
||||
ProtectSystem=strict
|
||||
ReadOnlyPaths=/var/lib/left4me/installation /var/lib/left4me/overlays
|
||||
ReadWritePaths=/var/lib/left4me/runtime/%i
|
||||
RestrictSUIDSGID=true
|
||||
LockPersonality=true
|
||||
PrivateIPC=true
|
||||
RestrictNamespaces=true # block unshare()/clone(CLONE_NEW*)
|
||||
|
||||
# === /proc and /sys ===
|
||||
ProtectProc=invisible # foreign-uid /proc hidden (paired with PrivatePIDs for full hide)
|
||||
ProcSubset=pid # /proc shows only PID dirs, no kallsyms/cpuinfo
|
||||
ProtectKernelTunables=true # /proc/sys, /sys read-only
|
||||
ProtectKernelModules=true # no module load/unload
|
||||
ProtectKernelLogs=true # no /dev/kmsg or syslog()
|
||||
ProtectClock=true # no settimeofday()
|
||||
ProtectControlGroups=true # /sys/fs/cgroup read-only
|
||||
ProtectHostname=true # no sethostname()
|
||||
LockPersonality=true # no personality() switches
|
||||
|
||||
# === Syscall filter ===
|
||||
# srcds_linux is i386 (Source 2007 engine). 'native x86' allows both
|
||||
# x86_64 (from srcds_run + the dynamic linker) and i386 (from srcds_linux).
|
||||
# Bare 'native' traps srcds_run in a respawn loop.
|
||||
SystemCallArchitectures=native x86
|
||||
SystemCallFilter=@system-service
|
||||
SystemCallFilter=~@debug @mount @raw-io @reboot @swap @cpu-emulation @obsolete @privileged
|
||||
# ~@debug is the load-bearing block for D2.a: drops ptrace(), process_vm_readv/writev().
|
||||
# ~@privileged blocks anything requiring CAP_*, redundant with empty bounding set.
|
||||
# MemoryDenyWriteExecute=true is NOT set — Source engine i386 .so files
|
||||
# have text relocations that need mprotect(W+X) during dynamic-linker pass.
|
||||
|
||||
# === Network ===
|
||||
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX # AF_UNIX needed for journald
|
||||
# Lock srcds bindable sockets to the game port range.
|
||||
SocketBindAllow=udp:27000-27999
|
||||
SocketBindAllow=tcp:27000-27999
|
||||
|
||||
# === Misc hygiene ===
|
||||
RestrictRealtime=true # no real-time scheduling
|
||||
RemoveIPC=true # clean up SysV IPC on unit stop
|
||||
KeyringMode=private # private kernel keyring
|
||||
UMask=0027
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
|||
|
|
@ -1,3 +1,25 @@
|
|||
# left4me web application — system unit.
|
||||
#
|
||||
# This is the REFERENCE COPY of the deployed unit. The live source is
|
||||
# the systemd/units reactor at ~/Projekte/ckn-bw/bundles/left4me/metadata.py
|
||||
# (look for 'left4me-web.service'). Hardening directives live in
|
||||
# the HARDENING_WEB constant near the top of the same file.
|
||||
# This file is hand-synced; edit both together.
|
||||
#
|
||||
# Several directives that the gameserver uses are intentionally absent
|
||||
# from this unit:
|
||||
# NoNewPrivileges — blocks sudo's setuid escalation
|
||||
# PrivateUsers — breaks sudo's host-root mapping
|
||||
# RestrictSUIDSGID — blocks setuid()/setgid()
|
||||
# CapabilityBoundingSet= — empty value would deny sudo's caps
|
||||
# ~@privileged in SystemCallFilter — blocks sudo's setuid syscall
|
||||
# The web app invokes privileged helpers (left4me-systemctl,
|
||||
# left4me-overlay, left4me-script-sandbox) via sudo, so these
|
||||
# directives can't be applied here. A future refactor replacing sudo
|
||||
# with systemctl-managed transient units would unlock them.
|
||||
#
|
||||
# Threat model + defenses + tests: see docs/superpowers/specs/2026-05-15-hardening-*
|
||||
|
||||
[Unit]
|
||||
Description=left4me web application
|
||||
After=network-online.target
|
||||
|
|
@ -7,25 +29,53 @@ Wants=network-online.target
|
|||
Type=simple
|
||||
User=left4me
|
||||
Group=left4me
|
||||
WorkingDirectory=/opt/left4me
|
||||
Environment=HOME=/var/lib/left4me
|
||||
Environment=PATH=/opt/left4me/.venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
WorkingDirectory=/opt/left4me/src
|
||||
Environment=HOME=/var/lib/left4me PATH=/opt/left4me/.venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
EnvironmentFile=/etc/left4me/host.env
|
||||
EnvironmentFile=/etc/left4me/web.env
|
||||
ExecStart=/opt/left4me/.venv/bin/gunicorn --workers 1 --threads 32 --bind 0.0.0.0:8000 'l4d2web.app:create_app()'
|
||||
# Placeholder values for --workers / --threads. Live emission interpolates
|
||||
# from metadata.get('left4me/gunicorn_workers') and gunicorn_threads.
|
||||
ExecStart=/opt/left4me/.venv/bin/gunicorn --workers 4 --threads 4 --bind 127.0.0.1:8000 'l4d2web.app:create_app()'
|
||||
Restart=on-failure
|
||||
RestartSec=3
|
||||
# NoNewPrivileges intentionally not set: the worker invokes sudo to run
|
||||
# the left4me-systemctl, left4me-journalctl, and left4me-overlay
|
||||
# privileged helpers, all setuid via sudo.
|
||||
# ProtectSystem=full + ReadWritePaths implicitly give this unit a private
|
||||
# mount namespace, but mount visibility no longer depends on it: overlay
|
||||
# mounts are performed by the left4me-overlay helper, which nsenters into
|
||||
# PID 1's mount namespace, so the resulting mount lives in the host
|
||||
# namespace where the per-instance gameserver units can see it.
|
||||
ProtectSystem=full
|
||||
|
||||
# Web writes broadly under /var/lib/left4me (DB, instance configs,
|
||||
# overlays, runtime). Kept inline because it's web-specific
|
||||
# (server@ uses BindPaths to bind only its instance dir).
|
||||
ReadWritePaths=/var/lib/left4me
|
||||
|
||||
# === Filesystem ===
|
||||
ProtectSystem=strict # tightened from prior 'full'; via HARDENING_COMMON
|
||||
ProtectHome=true
|
||||
PrivateTmp=true
|
||||
|
||||
# === /proc + kernel ===
|
||||
ProtectProc=invisible # foreign-uid /proc hidden (defense: D4)
|
||||
ProcSubset=pid
|
||||
ProtectKernelTunables=true
|
||||
ProtectKernelModules=true
|
||||
ProtectKernelLogs=true
|
||||
ProtectClock=true
|
||||
ProtectControlGroups=true
|
||||
ProtectHostname=true
|
||||
LockPersonality=true
|
||||
|
||||
# === Syscall filter (sudo-compatible — note absence of ~@privileged) ===
|
||||
SystemCallArchitectures=native
|
||||
SystemCallFilter=@system-service
|
||||
SystemCallFilter=~@debug @mount @raw-io @reboot @swap @cpu-emulation @obsolete
|
||||
# ~@debug blocks ptrace + process_vm_readv/writev (D4).
|
||||
# ~@privileged intentionally omitted — sudo needs setuid().
|
||||
|
||||
# === Network ===
|
||||
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
|
||||
|
||||
# === Misc hygiene ===
|
||||
RestrictNamespaces=true
|
||||
RestrictRealtime=true
|
||||
RemoveIPC=true
|
||||
KeyringMode=private
|
||||
UMask=0027
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
|||
Loading…
Reference in a new issue