Compare commits

..

2 commits

Author SHA1 Message Date
mwiegand
48381089d3
refactor(left4me-overlay): move uid translation to script-sandbox build
left4me-script-sandbox now pre-creates an idmapped bind staging path
(--map-users=<left4me_uid>:<sandbox_uid>:1) and points the sandbox's
BindPaths at that staging instead of the raw overlay dir. Writes from
inside the sandbox (uid l4d2-sandbox) land on disk as left4me, so all
overlay content is uniformly left4me-owned end-to-end.

left4me-overlay loses ~165 lines of idmap-on-mount logic: the per-
lowerdir stat + idmap-bind setup, the bind-umount loop in teardown,
the uid lookup helpers, the _is_mountpoint /proc/self/mountinfo parser,
and the LEFT4ME_TEST_* env-var stubs. It's back to a simple "validate
lowerdirs, mount overlay" shape; gameserver mount path no longer needs
to know about producer-side ownership decisions.

Verified on kernel 6.12 that the kernel idmap propagates through
systemd-run's plain re-bind of the staging path. Tests dropped 4
idmap-on-mount specs and one deploy-artifact regression check; added
test_script_sandbox_uses_idmap_staging to pin the new staging path
+ map flags + trap cleanup.

The post-build world-read chmod kludge in the sandbox is also dropped:
the web app reads overlay files via its primary uid (left4me).

Existing overlays on the test server are sandbox-owned from prior runs
and need a one-shot `chown -R left4me:left4me /var/lib/left4me/overlays`
during deploy. New overlays produced by the refactored sandbox are
left4me-owned from creation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 01:20:39 +02:00
mwiegand
bc25d423aa
plan(left4me): move idmap from gameserver mount to script-sandbox build
Architectural cleanup: the uid translation is a build-time concern
(the sandbox produces sandbox-uid files); having the gameserver path
unwind that producer-side decision on every mount means the mount
helper carries idmap lifecycle code it shouldn't need. Moving the
idmap into the script-sandbox bind makes files land left4me-owned on
disk, drops ~140 lines from left4me-overlay, and makes all overlay
content (workshop + script-built) consistent on-disk.

Verified on left4.me kernel 6.12.86 that the kernel idmap propagates
through plain re-bind, so systemd-run's BindPaths can wrap a
pre-created idmapped staging path.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 01:15:46 +02:00
5 changed files with 315 additions and 377 deletions

View file

@ -29,7 +29,6 @@ shell-quoted) and exit 0 instead of execv. Used by tests.
""" """
import os import os
import pwd
import re import re
import shlex import shlex
import shutil import shutil
@ -55,74 +54,6 @@ def die(msg: str) -> None:
sys.exit(1) sys.exit(1)
def _is_mountpoint(
path: str | Path,
mountinfo_path: str = "/proc/self/mountinfo",
) -> bool:
"""Reliable mount-point check that handles same-fs bind mounts.
`os.path.ismount()` compares `st_dev` of the path against its parent;
bind mounts on the same underlying filesystem share `st_dev` with their
parent, so `os.path.ismount()` returns False for them. The idmap binds
we install on `runtime/<n>/idmap/<basename>` are exactly that case.
Read /proc/self/mountinfo (field 5 is the mount point) for a check
that works regardless of mount type. The override is for tests only.
"""
abs_path = os.fspath(Path(path).resolve())
try:
with open(mountinfo_path, "r", encoding="utf-8") as f:
for line in f:
fields = line.split()
if len(fields) >= 5 and fields[4] == abs_path:
return True
except OSError:
pass
return False
def _lookup_uid(username: str) -> tuple[int, int]:
"""Return (uid, gid) for *username*, dying with a clear message if missing."""
try:
entry = pwd.getpwnam(username)
except KeyError:
die(
f"required system user {username!r} does not exist; "
"this is a deploy misconfiguration"
)
return entry.pw_uid, entry.pw_gid
def _get_user_ids() -> tuple[int, int, int, int]:
"""Return (sandbox_uid, sandbox_gid, left4me_uid, left4me_gid).
In normal operation, looks up the real system users. In PRINT_ONLY
(test) mode the env vars LEFT4ME_TEST_SANDBOX_UID/LEFT4ME_TEST_SANDBOX_GID/
LEFT4ME_TEST_LEFT4ME_UID/LEFT4ME_TEST_LEFT4ME_GID may be used to inject
synthetic uids so tests can run without root and without real system
users present. The stubs are intentionally ignored outside PRINT_ONLY
mode so that a misconfigured systemd unit override cannot influence the
real uid mapping.
"""
if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1":
sandbox_uid_env = os.environ.get("LEFT4ME_TEST_SANDBOX_UID")
sandbox_gid_env = os.environ.get("LEFT4ME_TEST_SANDBOX_GID")
left4me_uid_env = os.environ.get("LEFT4ME_TEST_LEFT4ME_UID")
left4me_gid_env = os.environ.get("LEFT4ME_TEST_LEFT4ME_GID")
if all(v is not None for v in (sandbox_uid_env, sandbox_gid_env,
left4me_uid_env, left4me_gid_env)):
return (
int(sandbox_uid_env), # type: ignore[arg-type]
int(sandbox_gid_env), # type: ignore[arg-type]
int(left4me_uid_env), # type: ignore[arg-type]
int(left4me_gid_env), # type: ignore[arg-type]
)
sandbox_uid, sandbox_gid = _lookup_uid("l4d2-sandbox")
left4me_uid, left4me_gid = _lookup_uid("left4me")
return sandbox_uid, sandbox_gid, left4me_uid, left4me_gid
def root() -> Path: def root() -> Path:
return Path(os.environ.get("LEFT4ME_ROOT") or DEFAULT_ROOT) return Path(os.environ.get("LEFT4ME_ROOT") or DEFAULT_ROOT)
@ -242,79 +173,7 @@ def cmd_mount(name: str) -> None:
assert_no_fuse_xattrs(upper) assert_no_fuse_xattrs(upper)
# Resolve user ids now (fails fast on deploy misconfiguration). options = f"lowerdir={':'.join(canonical_lowerdirs)},upperdir={upper},workdir={work}"
sandbox_uid, sandbox_gid, left4me_uid, left4me_gid = _get_user_ids()
# Build the final lowerdir list, substituting idmap bind-mount paths for
# any lowerdir owned by l4d2-sandbox. An idmap bind mount makes the kernel
# see the l4d2-sandbox-owned tree as if it were owned by left4me, so that
# overlayfs copy-up produces left4me-owned upperdir entries.
idmap_dir = runtime_name_dir / "idmap"
final_lowerdirs: list[str] = []
bind_argvs: list[list[str]] = []
seen_idmap_targets: dict[Path, str] = {}
for lowerdir in canonical_lowerdirs:
try:
st = os.stat(lowerdir)
except OSError as exc:
die(f"failed to stat lowerdir {shlex.quote(lowerdir)}: {exc}")
if st.st_uid == sandbox_uid:
# This lowerdir needs idmap remapping.
# Include the parent dirname to avoid basename collisions between
# lowerdirs from different allowlist roots (e.g. overlays/foo and
# workshop_cache/foo would otherwise map to the same idmap target).
p = Path(lowerdir)
lowerdir_basename = f"{p.parent.name}_{p.name}"
idmap_target = idmap_dir / lowerdir_basename
# Belt-and-braces: detect if two different lowerdirs would collide
# on the same idmap target after the parent+name derivation.
if idmap_target in seen_idmap_targets:
die(
f"idmap target collision: lowerdirs {shlex.quote(seen_idmap_targets[idmap_target])}"
f" and {shlex.quote(lowerdir)} both map to {idmap_target}"
)
seen_idmap_targets[idmap_target] = lowerdir
if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") != "1":
idmap_dir.mkdir(mode=0o700, exist_ok=True)
idmap_target.mkdir(mode=0o700, exist_ok=True)
if not _is_mountpoint(idmap_target) or \
os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1":
# --map-users / --map-groups argument format:
# <on-disk-uid>:<in-mount-uid>:<count>
# The util-linux man page calls these <inner>:<outer>, which is
# misleading. Empirically (verified on left4.me, kernel 6.12,
# ext4) the FIRST number is the on-disk uid and the SECOND is
# the uid exposed inside the mount. Don't swap them.
bind_argv = [
MOUNT_BIN,
"--bind",
f"--map-users={sandbox_uid}:{left4me_uid}:1",
f"--map-groups={sandbox_gid}:{left4me_gid}:1",
lowerdir,
str(idmap_target),
]
bind_argvs.append(bind_argv)
final_lowerdirs.append(str(idmap_target))
else:
final_lowerdirs.append(lowerdir)
print_only = os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1"
if print_only:
# Emit each bind-mount argv first, then fall through to the overlay argv.
for bind_argv in bind_argvs:
_print_argv(bind_argv)
else:
# Actually exec each bind mount before the overlay mount.
for bind_argv in bind_argvs:
subprocess.run(bind_argv, check=True)
options = f"lowerdir={':'.join(final_lowerdirs)},upperdir={upper},workdir={work}"
argv = [ argv = [
MOUNT_BIN, MOUNT_BIN,
"-t", "overlay", "-t", "overlay",
@ -338,20 +197,8 @@ def cmd_umount(name: str) -> None:
str(merged_path.resolve(strict=True) if merged_path.exists() else merged_path), str(merged_path.resolve(strict=True) if merged_path.exists() else merged_path),
] ]
# Collect idmap bind-umount argvs: one per direct subdir of runtime/<name>/idmap/.
idmap_dir = runtime_name_dir / "idmap"
bind_umount_argvs: list[list[str]] = []
if idmap_dir.is_dir():
for entry in sorted(idmap_dir.iterdir()):
if entry.is_dir():
bind_umount_argvs.append([UMOUNT_BIN, str(entry)])
# PRINT_ONLY: emit the overlay umount argv, then each bind-umount argv, then exit.
# Order matches real execution (overlay first, then idmap binds underneath).
if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1": if os.environ.get("LEFT4ME_OVERLAY_PRINT_ONLY") == "1":
_print_argv(overlay_umount_argv) _print_argv(overlay_umount_argv)
for bind_umount_argv in bind_umount_argvs:
_print_argv(bind_umount_argv)
sys.exit(0) sys.exit(0)
if merged_path.exists(): if merged_path.exists():
@ -382,16 +229,6 @@ def cmd_umount(name: str) -> None:
if work_inner.exists(): if work_inner.exists():
shutil.rmtree(work_inner) shutil.rmtree(work_inner)
# Unwind idmap bind mounts, then remove the idmap directory. Each bind
# is only umounted if it is still a mountpoint (idempotent across partial
# teardowns). _is_mountpoint reads /proc/self/mountinfo because
# os.path.ismount misses same-fs bind mounts.
for bind_umount_argv in bind_umount_argvs:
target = Path(bind_umount_argv[-1])
if _is_mountpoint(target):
subprocess.run(bind_umount_argv, check=True)
shutil.rmtree(idmap_dir, ignore_errors=True)
def main(argv: list[str]) -> None: def main(argv: list[str]) -> None:
if len(argv) != 3 or argv[1] not in ("mount", "umount"): if len(argv) != 3 or argv[1] not in ("mount", "umount"):

View file

@ -34,13 +34,36 @@ if [[ "${LEFT4ME_SCRIPT_SANDBOX_DRY_RUN:-}" == "1" ]]; then
exit 0 exit 0
fi fi
# Make sure the sandbox UID owns the overlay dir so the script can write there. # Pre-create an idmapped bind of the overlay dir, then point the sandbox's
# Idempotent: a no-op when the dir is already l4d2-sandbox-owned (re-run case), # BindPaths at that staging path. The bind translates the sandbox's writing
# and corrects the ownership the first time the dir was created by the web app # uid (l4d2-sandbox) back to left4me on disk, so all overlay content
# under the left4me UID. World-readable so the gameserver process (left4me) # (script-built and workshop) is uniformly left4me-owned. Map direction:
# can read the overlay contents via the kernel-overlayfs lowerdir at runtime. # `--map-users=<disk_uid>:<mount_uid>:1` with disk=left4me, mount=sandbox —
chown -R l4d2-sandbox:l4d2-sandbox "$OVERLAY_DIR" # a process inside the bind with uid sandbox sees its uid as itself, and
chmod 0755 "$OVERLAY_DIR" # writes get translated to disk-uid left4me. Verified on kernel 6.12 that
# idmap propagates through systemd-run's plain re-bind of the staging path.
LEFT4ME_UID=$(id -u left4me)
LEFT4ME_GID=$(id -g left4me)
SANDBOX_UID=$(id -u l4d2-sandbox)
SANDBOX_GID=$(id -g l4d2-sandbox)
STAGING=/var/lib/left4me/tmp/sandbox-idmap-${OVERLAY_ID}
# trap fires even on errors / signals so the staging bind doesn't outlive
# this invocation. Idempotent if the staging is already gone.
cleanup_staging() {
umount "$STAGING" 2>/dev/null || true
rmdir "$STAGING" 2>/dev/null || true
}
trap cleanup_staging EXIT
# A leftover staging mount from a SIGKILLed prior run can be reset by
# umounting first, then re-binding fresh on the same path.
umount "$STAGING" 2>/dev/null || true
mkdir -p "$STAGING"
mount --bind \
--map-users="${LEFT4ME_UID}:${SANDBOX_UID}:1" \
--map-groups="${LEFT4ME_GID}:${SANDBOX_GID}:1" \
"$OVERLAY_DIR" "$STAGING"
SCRIPT_RC=0 SCRIPT_RC=0
systemd-run --quiet --collect --wait --pipe \ systemd-run --quiet --collect --wait --pipe \
@ -64,19 +87,11 @@ systemd-run --quiet --collect --wait --pipe \
-p IPAddressDeny="127.0.0.0/8 ::1/128 169.254.0.0/16 fe80::/10 224.0.0.0/4 ff00::/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 100.64.0.0/10 fc00::/7" \ -p IPAddressDeny="127.0.0.0/8 ::1/128 169.254.0.0/16 fe80::/10 224.0.0.0/4 ff00::/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 100.64.0.0/10 fc00::/7" \
-p TemporaryFileSystem="/etc /var/lib" \ -p TemporaryFileSystem="/etc /var/lib" \
-p BindReadOnlyPaths="/etc/left4me/sandbox-resolv.conf:/etc/resolv.conf /etc/ssl /etc/ca-certificates /etc/nsswitch.conf /etc/alternatives ${SCRIPT}:/script.sh" \ -p BindReadOnlyPaths="/etc/left4me/sandbox-resolv.conf:/etc/resolv.conf /etc/ssl /etc/ca-certificates /etc/nsswitch.conf /etc/alternatives ${SCRIPT}:/script.sh" \
-p BindPaths="${OVERLAY_DIR}:/overlay" \ -p BindPaths="${STAGING}:/overlay" \
-p WorkingDirectory=/overlay \ -p WorkingDirectory=/overlay \
-p Environment="HOME=/tmp PATH=/usr/bin:/usr/sbin OVERLAY=/overlay" \ -p Environment="HOME=/tmp PATH=/usr/bin:/usr/sbin OVERLAY=/overlay" \
-p MemoryMax=4G -p MemorySwapMax=0 -p TasksMax=512 \ -p MemoryMax=4G -p MemorySwapMax=0 -p TasksMax=512 \
-p CPUQuota=200% -p RuntimeMaxSec=3600 \ -p CPUQuota=200% -p RuntimeMaxSec=3600 \
-- /bin/bash /script.sh || SCRIPT_RC=$? -- /bin/bash /script.sh || SCRIPT_RC=$?
# Normalize perms so the web service (left4me uid) can read overlay files
# directly via Python open() — needed by the file tree's download endpoint.
# UMask=0022 above takes care of *new* writes; this catches anything the
# script created with a tighter mode (e.g. cedapug_maps writes its
# .cedapug/manifest.tsv as 0600 by default).
find "$OVERLAY_DIR" -type f ! -perm -o+r -exec chmod o+r {} + 2>/dev/null || true
find "$OVERLAY_DIR" -type d ! -perm -o+rx -exec chmod o+rx {} + 2>/dev/null || true
exit $SCRIPT_RC exit $SCRIPT_RC

View file

@ -394,26 +394,27 @@ def test_overlay_helper_is_python_with_strict_validation():
assert '"unmount"' not in text assert '"unmount"' not in text
def test_overlay_helper_idmaps_sandbox_owned_lowerdirs(): def test_script_sandbox_uses_idmap_staging():
"""Script-built overlay lowerdirs are owned by l4d2-sandbox. Without an """The sandbox runs as l4d2-sandbox but writes need to land on disk as
idmap bind mount, kernel-overlayfs copy-up preserves that ownership and left4me, so all overlay content (workshop + script-built) is uniformly
the gameserver (uid left4me) can't write to copied-up directories like left4me-owned. The helper pre-creates an idmapped bind on a staging
addons/sourcemod/logs/. The helper must inject an idmap bind for each path and points the sandbox's BindPaths at the staging, not at the raw
sandbox-owned lowerdir before the overlay mount and tear it down after. overlay dir. trap cleans up the staging bind on exit.
""" """
text = OVERLAY_HELPER.read_text() text = SCRIPT_SANDBOX_HELPER.read_text()
# The bind-mount argv uses --map-users / --map-groups (numeric uids). # Idmap mount setup uses --map-users / --map-groups.
assert "--map-users=" in text assert "--map-users=" in text
assert "--map-groups=" in text assert "--map-groups=" in text
# Idmapped paths live under runtime/<name>/idmap/ and are substituted # Staging path lives under /var/lib/left4me/tmp/sandbox-idmap-<id>.
# into the lowerdir= string. assert "/var/lib/left4me/tmp/sandbox-idmap-" in text
assert 'runtime_name_dir / "idmap"' in text # BindPaths into the sandbox points at the staging path, not the
# Test-mode uid stubs are namespaced LEFT4ME_TEST_* and gated on # raw overlay dir.
# PRINT_ONLY=1 so a misconfigured systemd unit can't inject uids. assert 'BindPaths="${STAGING}:/overlay"' in text
assert "LEFT4ME_TEST_SANDBOX_UID" in text # trap registers cleanup so the staging bind doesn't outlive the helper.
assert "LEFT4ME_TEST_LEFT4ME_UID" in text assert "trap " in text and "cleanup_staging" in text
# Collision guard: two lowerdirs deriving the same idmap target die loudly. # The previous chown-to-l4d2-sandbox approach is gone; overlay dirs
assert "seen_idmap_targets" in text # stay left4me-owned end-to-end.
assert "chown -R l4d2-sandbox" not in text
def test_deploy_script_installs_overlay_helper_with_executable_mode(): def test_deploy_script_installs_overlay_helper_with_executable_mode():
@ -659,7 +660,7 @@ def test_script_sandbox_helper_invokes_systemd_run_with_hardening():
assert "/etc/nsswitch.conf" in text assert "/etc/nsswitch.conf" in text
assert "/etc/alternatives" in text assert "/etc/alternatives" in text
assert "${SCRIPT}:/script.sh" in text assert "${SCRIPT}:/script.sh" in text
assert 'BindPaths="${OVERLAY_DIR}:/overlay"' in text assert 'BindPaths="${STAGING}:/overlay"' in text
# IP egress filter: allow public, deny localhost / RFC1918 / link-local / # IP egress filter: allow public, deny localhost / RFC1918 / link-local /
# multicast / CGNAT / ULA. systemd's "more specific rule wins" semantics # multicast / CGNAT / ULA. systemd's "more specific rule wins" semantics

View file

@ -0,0 +1,264 @@
# Build-time idmap: move the uid translation from the gameserver mount
into the script sandbox
## Context
The current idmap implementation translates uids at **gameserver mount
time**: `left4me-overlay` stats each lowerdir, creates a per-lowerdir
idmapped bind under `runtime/<n>/idmap/<basename>` for the sandbox-
owned ones, then uses those bind paths in the overlay's `lowerdir=`.
On stop, the binds get torn down. Works correctly today, but spreads
the idmap concern across two helpers and adds mount lifecycle code on
every gameserver start.
Cleaner alternative: do the idmap translation at **script-sandbox
build time**, so files land on disk as `left4me`-owned. The on-disk
state then matches workshop-built overlays (also left4me-owned), and
the gameserver mount path becomes uniform — no per-lowerdir stat,
no idmap binds, no extra cleanup.
This plan switches the architecture to the build-time approach and
reverts the gameserver-mount idmap code.
## Verified mechanism
Tested end-to-end on `left4.me` (Trixie, kernel 6.12.86, ext4) on
2026-05-15:
1. `/source/` dir owned by `left4me` on disk.
2. `mount --bind --map-users=980:981:1 --map-groups=980:981:1
/source /idmapped` — inside `/idmapped`, files appear as uid 981
(sandbox view).
3. `mount --bind /idmapped /rebound` — a plain second bind. The idmap
**propagates** to `/rebound` (rebound view also shows uid 981).
This is what `BindPaths=` in the sandbox unit does.
4. `sudo -u l4d2-sandbox touch /rebound/x.txt` — write **succeeds**.
The file lands on disk owned by `left4me` (uid 980).
Map direction is the inverse of the gameserver-side map:
`--map-users=<disk_uid>:<mount_uid>:1` where disk is `left4me` and
mount-side is `l4d2-sandbox`. Inside the bind, the sandbox uid sees
its own uid as itself; writes from that uid get translated back to
the disk-side (left4me) for storage.
## Approach
### Script-sandbox helper (`deploy/files/usr/local/libexec/left4me/left4me-script-sandbox`)
Pre-create an idmapped bind staging path, point the sandbox's
BindPaths at it, clean up on exit. Concretely:
1. **Remove** the existing `chown -R l4d2-sandbox:l4d2-sandbox
"$OVERLAY_DIR"` and `chmod 0755` lines. The overlay dir stays
`left4me`-owned (web app's creation default).
2. **Add** a setup block before `systemd-run`:
```bash
STAGING=/var/lib/left4me/tmp/sandbox-idmap-${OVERLAY_ID}
trap 'umount "$STAGING" 2>/dev/null || true; rmdir "$STAGING" 2>/dev/null || true' EXIT
mkdir -p "$STAGING"
mount --bind \
--map-users=$(id -u left4me):$(id -u l4d2-sandbox):1 \
--map-groups=$(id -g left4me):$(id -g l4d2-sandbox):1 \
"$OVERLAY_DIR" "$STAGING"
```
3. **Change** the systemd-run line:
- `BindPaths="${OVERLAY_DIR}:/overlay"``BindPaths="${STAGING}:/overlay"`
4. **Remove** the post-build `find ... chmod o+r` block. Files end up
left4me-owned, web app reads them via its primary uid. The
world-read kludge was only needed because of the old sandbox-
owned files; with this change it's obsolete.
`trap` ensures the staging bind is umounted even on errors / signals.
Idempotent: if the helper is re-run, `umount + rmdir` handle existing
state, and `mkdir -p` + `mount --bind` over an existing mountpoint
adds another bind that the next exit cleans up. The kernel 6.12 bind
nesting on the same path works fine (verified during the recent
gameserver-side idmap fix).
### Gameserver-mount helper (`deploy/files/usr/local/libexec/left4me/left4me-overlay`)
Revert the idmap logic added in commit `2f6a9cf` (+ fix in `9053186`,
+ mountpoint-detection fix in `dd918ac`). Specifically:
1. **Remove** the per-lowerdir stat + idmap-decision loop in `cmd_mount`.
`lowerdir=` becomes the simple colon-join of resolved lowerdirs
(the pre-2f6a9cf shape).
2. **Remove** the bind-umount loop in `cmd_umount` and the
`shutil.rmtree(idmap_dir, ...)` line.
3. **Remove** the `_is_mountpoint`, `_lookup_uid`, and `_get_user_ids`
helpers — no longer used. (Keep `os.path.ismount` for the merged
overlay check; that one's reliable.)
4. **Remove** the `LEFT4ME_TEST_*_UID/GID` test-only env-var stubs.
5. **Remove** the idmap PRINT_ONLY emission.
The helper shrinks back to the pre-idmap size (~242 lines from current 381).
### Tests
In `l4d2host/tests/test_overlay_helper.py`:
1. **Remove** `test_mount_idmaps_sandbox_owned_lowerdir`.
2. **Remove** `test_mount_skips_idmap_for_left4me_owned_lowerdir`.
3. **Remove** `test_umount_unwinds_idmap_binds`.
4. **Remove** `test_is_mountpoint_detects_same_fs_bind_mount` and the
`_load_helper_module` helper.
5. **Remove** `_setup_instance_with_uid` and the `FAKE_*_UID/GID`
constants.
6. **Remove** the `LEFT4ME_TEST_*` env-var injection in `_run`.
In `deploy/tests/test_deploy_artifacts.py`:
1. **Remove** `test_overlay_helper_idmaps_sandbox_owned_lowerdirs`
(the regression test for the soon-removed feature).
2. **Add** a new test `test_script_sandbox_uses_idmap_staging` that
asserts the sandbox helper contains:
- `--map-users=` and `--map-groups=` strings (the bind setup),
- `/var/lib/left4me/tmp/sandbox-idmap-` (the staging path prefix),
- `BindPaths="${STAGING}:/overlay"` (or close equivalent — point
the bind at the idmapped staging path, not at OVERLAY_DIR).
- A `trap` for cleanup.
3. **Remove** the existing `chown -R l4d2-sandbox` assertion in the
sandbox-helper test (if any).
### Migration
Existing overlays under `/var/lib/left4me/overlays/<id>/` are a mix:
- Workshop-built: already `left4me`-owned (no migration needed).
- Script-built (e.g. server 2's overlays 4 and 9): currently
`l4d2-sandbox`-owned from the prior helper version. **Need chown to
`left4me:left4me`.**
One-shot migration command on the test server (run before deploying
the new helpers, OR after — both work because the new script-sandbox
also expects left4me-owned dirs):
```bash
sudo chown -R left4me:left4me /var/lib/left4me/overlays/
```
That's safe — overlays/* are all overlay content, no other tenants.
The workshop ones are already left4me; the chown is a no-op for them.
The script-built ones get flipped to the new ownership model.
Running gameservers using the old idmap-bind setup will keep working
on the old overlays/<id> files (which they bind via the now-orphan
idmap bind that's already in place). The next stop/start cycle picks
up the new helper, which:
- Doesn't create any new idmap binds (gameserver-side helper has
none),
- Cleans up the legacy idmap binds it finds (the existing umount loop
in the current helper handles this on the way out).
After the first stop/start cycle, no more idmap binds exist anywhere
in the system. Steady state.
### ckn-bw bundle
No changes needed. The `install_left4me_scripts` action picks up the
new helper contents from `/opt/left4me/src/deploy/files/usr/local/...`
on the next `git_deploy` apply. ckn-bw itself is content-agnostic
about the helper internals.
## Files to modify
- `deploy/files/usr/local/libexec/left4me/left4me-script-sandbox` — add
idmap bind setup + trap cleanup; remove old chown; switch BindPaths.
- `deploy/files/usr/local/libexec/left4me/left4me-overlay` — revert the
~140 lines of idmap-handling code; remove uid lookup, mountinfo
helper, test-stub env vars; drop the idmap PRINT_ONLY emission.
- `l4d2host/tests/test_overlay_helper.py` — drop idmap tests and
helpers.
- `deploy/tests/test_deploy_artifacts.py` — flip the asserted
invariant (helper has idmap → sandbox has idmap).
## Verification
End-to-end on `left4.me`:
1. Push left4me commit, `bw apply ovh.left4me`.
2. `sudo chown -R left4me:left4me /var/lib/left4me/overlays/` (one-shot
migration).
3. `sudo systemctl restart left4me-server@2`.
4. `sudo findmnt --task 1 -o TARGET | grep runtime/2` — expect *only*
`runtime/2/merged`, no `idmap/*` subdirs.
5. `sudo ls -ln /var/lib/left4me/overlays/9/` and a couple of other
script overlays — expect `left4me:left4me`.
6. Trigger an overlay rebuild from the web UI on a script overlay.
Confirm the build succeeds and the resulting files are
left4me-owned on disk.
7. `sudo -u left4me touch
/var/lib/left4me/runtime/2/merged/left4dead2/addons/sourcemod/logs/test.log`
— expect write to succeed (verifies SM logging path still works).
8. RCON `sm_cvar nb_update_frequency 0.0333` — no permission-denied
line in `journalctl -u left4me-server@2`.
Local tests:
```
pytest l4d2host/tests/test_overlay_helper.py -q
pytest deploy/tests/test_deploy_artifacts.py -q
```
Both should pass with reduced test count (removed idmap-on-mount
tests, added one sandbox-helper assertion).
## Risks
- **Kernel version dependency**: idmap propagation through plain
re-bind was verified on 6.12.86. Older kernels may behave
differently. ovh.left4me is on Trixie's 6.12, so we're fine; future
hosts on older kernels would need verification. Document the kernel
floor (≥ 6.6 for overlayfs+idmap, but ≥ 6.x for the propagation —
we have no exact lower bound documented).
- **Stale idmap binds during migration**: server 2 currently has two
active gameserver-side idmap binds (`runtime/2/idmap/overlays_4`
and `overlays_9`). The first stop after deploy uses the existing
helper code (with `_is_mountpoint` fix) to umount them. Verified
in the recent fix cycle. New starts won't create new binds.
- **Sandbox migration of in-flight builds**: if a script-overlay
build is running during the deploy + chown migration, the chown
could happen mid-write. Mitigation: don't run the chown while a
build is active; check via `systemctl list-units
'left4me-script-*'` first.
- **The trap-based cleanup in bash**: if the helper is hit with
SIGKILL, the trap doesn't fire and the staging bind leaks. Same
exposure as today's leaks (gameserver-side stale binds on similar
scenarios). Acceptable; the next sandbox run for the same overlay
id `umount`s the leftover bind first via the trap setup pattern
(`umount; rmdir; mkdir -p; mount --bind` is idempotent).
## Why this is worth doing despite the working current solution
Today's idmap-on-mount works and is correct. The reasons to refactor:
- **Architectural locality**: the uid translation is a build-time
concern (the sandbox creates files); having it as a mount-time
concern means the gameserver path needs to know about a producer-
side decision.
- **Code reduction**: helper shrinks by ~140 lines; tests by ~150.
Removed code is removed bug surface.
- **On-disk consistency**: all overlay content becomes `left4me`-
owned. Easier to reason about (no two-tier ownership), easier to
manually inspect (no per-overlay-type ownership).
- **Mount lifecycle simplification**: no per-instance idmap dir
creation, no per-start uid lookups, no per-stop bind teardown, no
stacked-bind regression hazard from the same-fs `os.path.ismount`
trap (we already fixed that once).
- **Web app read path**: drops the world-read chmod kludge in the
sandbox helper. File-tree download endpoint reads via primary uid.
The cost (refactor + migration) is paid once; the benefit is
permanent.
## Out of scope
- Splitting the web-app uid from the gameserver uid (future change
noted in earlier plans).
- Rewriting shell helpers in Python.
- `left4me-apply-cake` cleanup (still drifting along in the install
glob).
- Re-examining whether `l4d2-sandbox` should exist as a separate uid
at all (this plan keeps it, but the cost-benefit might shift
later).

View file

@ -41,11 +41,6 @@ def _run(args: list[str], root: Path, extra_env: dict[str, str] | None = None) -
**os.environ, **os.environ,
"LEFT4ME_ROOT": str(root), "LEFT4ME_ROOT": str(root),
"LEFT4ME_OVERLAY_PRINT_ONLY": "1", "LEFT4ME_OVERLAY_PRINT_ONLY": "1",
# Inject synthetic user ids so tests work without real system users.
"LEFT4ME_TEST_SANDBOX_UID": str(FAKE_SANDBOX_UID),
"LEFT4ME_TEST_SANDBOX_GID": str(FAKE_SANDBOX_GID),
"LEFT4ME_TEST_LEFT4ME_UID": str(FAKE_LEFT4ME_UID),
"LEFT4ME_TEST_LEFT4ME_GID": str(FAKE_LEFT4ME_GID),
} }
if extra_env: if extra_env:
env.update(extra_env) env.update(extra_env)
@ -161,136 +156,6 @@ def test_rejects_empty_lowerdir_entry(tmp_path: Path) -> None:
assert "empty entry" in result.stderr assert "empty entry" in result.stderr
FAKE_SANDBOX_UID = 7001
FAKE_SANDBOX_GID = 7001
FAKE_LEFT4ME_UID = 7002
FAKE_LEFT4ME_GID = 7002
def _setup_instance_with_uid(
root: Path,
name: str = "alpha",
lowerdir_uid: int = FAKE_LEFT4ME_UID,
lowerdir_gid: int = FAKE_LEFT4ME_GID,
) -> Path:
"""Like _setup_instance but chowns the lowerdir to a specific uid/gid."""
overlay_dir = root / "overlays" / "workshop"
overlay_dir.mkdir(parents=True, exist_ok=True)
try:
os.chown(overlay_dir, lowerdir_uid, lowerdir_gid)
except PermissionError:
pass # tests not running as root — uid won't match; that's fine for the "skips idmap" test
(root / "installation").mkdir(parents=True, exist_ok=True)
lowerdirs = [str(overlay_dir), str(root / "installation")]
inst_dir = root / "instances" / name
inst_dir.mkdir(parents=True, exist_ok=True)
(inst_dir / "instance.env").write_text(
f"L4D2_LOWERDIRS={':'.join(lowerdirs)}\n"
)
runtime = root / "runtime" / name
(runtime / "upper").mkdir(parents=True, exist_ok=True)
(runtime / "work").mkdir(parents=True, exist_ok=True)
(runtime / "merged").mkdir(parents=True, exist_ok=True)
return overlay_dir
def test_mount_idmaps_sandbox_owned_lowerdir(tmp_path: Path) -> None:
"""A lowerdir owned by l4d2-sandbox uid triggers an idmap bind mount.
The overlay lowerdir= string must reference the idmap path, not the raw
overlay path. A mount --bind --map-users/--map-groups argv must be emitted
before the overlay mount argv.
"""
overlay_dir = _setup_instance_with_uid(
tmp_path, lowerdir_uid=FAKE_SANDBOX_UID, lowerdir_gid=FAKE_SANDBOX_GID
)
try:
os.chown(overlay_dir, FAKE_SANDBOX_UID, FAKE_SANDBOX_GID)
except PermissionError:
pytest.skip("chown requires root — skip on unprivileged runner")
result = _run(["mount", "alpha"], tmp_path)
assert result.returncode == 0, result.stderr
lines = [l for l in result.stdout.splitlines() if l.strip()]
assert len(lines) == 2, f"expected 2 argv lines, got: {result.stdout!r}"
bind_parts = shlex.split(lines[0])
assert bind_parts[0] == "/bin/mount"
assert "--bind" in bind_parts
assert f"--map-users={FAKE_SANDBOX_UID}:{FAKE_LEFT4ME_UID}:1" in bind_parts
assert f"--map-groups={FAKE_SANDBOX_GID}:{FAKE_LEFT4ME_GID}:1" in bind_parts
assert bind_parts[-2] == str(overlay_dir)
idmap_target = str(tmp_path / "runtime" / "alpha" / "idmap" / "overlays_workshop")
assert bind_parts[-1] == idmap_target
overlay_parts = shlex.split(lines[1])
assert overlay_parts[0] == "/bin/mount"
assert overlay_parts[1:3] == ["-t", "overlay"]
options = overlay_parts[5]
assert f"lowerdir={idmap_target}:" in options, \
f"lowerdir should start with idmap path; got: {options!r}"
assert str(overlay_dir) not in options, \
f"raw overlay path should not appear in lowerdir; got: {options!r}"
def test_mount_skips_idmap_for_left4me_owned_lowerdir(tmp_path: Path) -> None:
"""A lowerdir already owned by the left4me uid needs no idmap bind mount."""
overlay_dir = _setup_instance_with_uid(
tmp_path, lowerdir_uid=FAKE_LEFT4ME_UID, lowerdir_gid=FAKE_LEFT4ME_GID
)
# Best-effort chown to the left4me uid — skip if not root.
try:
os.chown(overlay_dir, FAKE_LEFT4ME_UID, FAKE_LEFT4ME_GID)
except PermissionError:
# Without root, st_uid is 0 or our own uid; neither matches FAKE_SANDBOX_UID,
# so the helper will correctly skip the idmap bind either way.
pass
result = _run(["mount", "alpha"], tmp_path)
assert result.returncode == 0, result.stderr
lines = [l for l in result.stdout.splitlines() if l.strip()]
assert len(lines) == 1, f"expected 1 argv line (no bind mount), got: {result.stdout!r}"
overlay_parts = shlex.split(lines[0])
assert overlay_parts[0] == "/bin/mount"
assert "--bind" not in overlay_parts
options = overlay_parts[5]
idmap_subdir = str(tmp_path / "runtime" / "alpha" / "idmap")
assert idmap_subdir not in options, f"idmap path should not appear; got: {options!r}"
assert str(overlay_dir) in options
def test_umount_unwinds_idmap_binds(tmp_path: Path) -> None:
"""umount emits bind-umount lines for each idmap subdir, after the overlay umount."""
_setup_instance(tmp_path)
# Pre-seed an idmap subdir as if a previous mount had set it up.
idmap_dir = tmp_path / "runtime" / "alpha" / "idmap"
idmap_dir.mkdir(parents=True)
idmap_sub = idmap_dir / "workshop"
idmap_sub.mkdir()
result = _run(["umount", "alpha"], tmp_path)
assert result.returncode == 0, result.stderr
lines = [l for l in result.stdout.splitlines() if l.strip()]
assert len(lines) >= 2, f"expected at least 2 argv lines, got: {result.stdout!r}"
# First line: overlay umount
overlay_umount_parts = shlex.split(lines[0])
assert overlay_umount_parts == [
"/bin/umount",
str(tmp_path / "runtime" / "alpha" / "merged"),
]
# Subsequent lines: bind umounts for each idmap subdir
bind_umount_parts = shlex.split(lines[1])
assert bind_umount_parts[0] == "/bin/umount"
assert bind_umount_parts[-1] == str(idmap_sub)
@pytest.mark.skipif(sys.platform != "linux", reason="user.* xattrs are Linux-only") @pytest.mark.skipif(sys.platform != "linux", reason="user.* xattrs are Linux-only")
def test_rejects_upperdir_with_fuseoverlayfs_xattr(tmp_path: Path) -> None: def test_rejects_upperdir_with_fuseoverlayfs_xattr(tmp_path: Path) -> None:
_setup_instance(tmp_path) _setup_instance(tmp_path)
@ -303,47 +168,3 @@ def test_rejects_upperdir_with_fuseoverlayfs_xattr(tmp_path: Path) -> None:
result = _run(["mount", "alpha"], tmp_path) result = _run(["mount", "alpha"], tmp_path)
assert result.returncode != 0 assert result.returncode != 0
assert "fuse-overlayfs xattr" in result.stderr assert "fuse-overlayfs xattr" in result.stderr
def _load_helper_module():
"""Import the helper script as a Python module for unit testing internals.
The helper file has no .py extension, so importlib needs an explicit
SourceFileLoader rather than auto-detection.
"""
import importlib.util
from importlib.machinery import SourceFileLoader
loader = SourceFileLoader("left4me_overlay", str(HELPER_SOURCE))
spec = importlib.util.spec_from_loader("left4me_overlay", loader)
assert spec is not None
module = importlib.util.module_from_spec(spec)
loader.exec_module(module)
return module
def test_is_mountpoint_detects_same_fs_bind_mount(tmp_path: Path) -> None:
"""_is_mountpoint reads /proc/self/mountinfo so it works for same-fs bind mounts.
Regression: os.path.ismount() compares st_dev against the parent, which
silently returns False for same-fs bind mounts. The idmap binds we install
on runtime/<n>/idmap/<basename> are exactly that case, so an ismount-based
check skipped umount on stop and re-bound on top on start accumulating
mount-table entries across stop/start cycles.
"""
helper = _load_helper_module()
target = tmp_path / "some-bind"
target.mkdir()
abs_target = str(target.resolve())
mountinfo = tmp_path / "fake-mountinfo"
# mountinfo column 5 is the mountpoint; build a minimal line that exercises
# the parse without depending on the rest of the format.
mountinfo.write_text(
f"42 1 0:30 / {abs_target} rw,relatime - tmpfs tmpfs rw\n"
f"43 1 0:31 / /some/other/path rw,relatime - tmpfs tmpfs rw\n"
)
assert helper._is_mountpoint(target, str(mountinfo)) is True
assert helper._is_mountpoint(tmp_path / "not-a-mount", str(mountinfo)) is False
assert helper._is_mountpoint(target, str(tmp_path / "no-such-file")) is False