left4me/deploy/deploy-test-server.sh
mwiegand cf865d4915
fix(deploy): one-shot cleanup of orphan overlay dirs after globals removal
Migration 0005_script_overlays drops the legacy l4d2center_maps /
cedapug_maps overlay rows but leaves their /var/lib/left4me/overlays/{id}
directories on disk. When the web app subsequently creates a new overlay
and AUTOINCREMENT issues an id matching one of those orphans,
create_overlay_directory(exist_ok=False) crashes with FileExistsError —
which surfaced as a 500 on POST /overlays the first time a script
overlay was created on a deployed test box.

Adds a sentinel-gated sweep in deploy-test-server.sh that lists overlay
ids in the DB, removes any directory under overlays/ whose id has no
matching row, and drops the now-unused global_overlay_cache. Mirrors the
.kernel-overlay-migrated sentinel pattern so reruns are no-ops.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 16:16:33 +02:00

249 lines
9.7 KiB
Bash
Executable file

#!/bin/sh
set -eu
usage() {
printf 'Usage: %s <ssh-user@host>\n' "$0" >&2
exit 2
}
if [ "$#" -ne 1 ]; then
usage
fi
target=$1
script_dir=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
repo_root=$(CDPATH= cd -- "$script_dir/.." && pwd)
tmp_dir=$(mktemp -d)
archive="$tmp_dir/left4me.tar.gz"
cleanup() {
rm -rf "$tmp_dir"
}
trap cleanup EXIT INT HUP TERM
COPYFILE_DISABLE=1 tar -czf "$archive" \
--exclude .git \
--exclude .claude \
--exclude .venv \
--exclude __pycache__ \
--exclude .pytest_cache \
--exclude '*.egg-info' \
--exclude 'l4d2web.db*' \
--exclude '._*' \
-C "$repo_root" .
remote_tmp=$(ssh "$target" 'mktemp -d')
scp "$archive" "$target:$remote_tmp/left4me.tar.gz"
admin_username_file=
admin_password_file=
if [ "${LEFT4ME_ADMIN_USERNAME+x}" = x ] && [ "${LEFT4ME_ADMIN_PASSWORD+x}" = x ]; then
admin_username_file="$tmp_dir/admin_username"
admin_password_file="$tmp_dir/admin_password"
umask 077
printf '%s' "$LEFT4ME_ADMIN_USERNAME" > "$admin_username_file"
printf '%s' "$LEFT4ME_ADMIN_PASSWORD" > "$admin_password_file"
scp "$admin_username_file" "$target:$remote_tmp/admin_username"
scp "$admin_password_file" "$target:$remote_tmp/admin_password"
fi
ssh "$target" sh -s -- "$remote_tmp" <<'REMOTE'
set -eu
remote_tmp=$1
archive="$remote_tmp/left4me.tar.gz"
repo_tmp="$remote_tmp/repo"
if [ "$(id -u)" -eq 0 ]; then
sudo_cmd=
else
sudo_cmd=sudo
fi
run_as_left4me() {
sudo -u left4me "$@"
}
run_left4me_with_env() {
run_as_left4me sh -c 'set -a; . /etc/left4me/host.env; . /etc/left4me/web.env; set +a; exec "$@"' sh "$@"
}
cleanup_remote() {
rm -rf "$remote_tmp"
}
trap cleanup_remote EXIT INT HUP TERM
if ! id left4me >/dev/null 2>&1; then
$sudo_cmd useradd --system --home-dir /var/lib/left4me --create-home --shell /usr/sbin/nologin left4me
fi
# Sandbox uid for script-overlay builds. No home, no login shell — the bwrap
# invocation uses --uid/--gid to drop to it.
if ! id l4d2-sandbox >/dev/null 2>&1; then
$sudo_cmd useradd --system --no-create-home --shell /usr/sbin/nologin l4d2-sandbox
fi
if command -v apt-get >/dev/null 2>&1; then
$sudo_cmd apt-get update
$sudo_cmd apt-get install -y python3 python3-venv python3-pip curl ca-certificates tar gzip util-linux sudo bubblewrap
elif command -v dnf >/dev/null 2>&1; then
$sudo_cmd dnf install -y python3 python3-pip curl ca-certificates tar gzip util-linux sudo bubblewrap
else
printf 'Unsupported package manager: expected apt-get or dnf\n' >&2
exit 1
fi
$sudo_cmd mkdir -p \
/etc/left4me \
/opt/left4me \
/usr/local/lib/systemd/system \
/usr/local/libexec/left4me \
/var/lib/left4me/installation \
/var/lib/left4me/overlays \
/var/lib/left4me/instances \
/var/lib/left4me/runtime \
/var/lib/left4me/workshop_cache \
/var/lib/left4me/tmp
$sudo_cmd chown left4me:left4me \
/var/lib/left4me \
/var/lib/left4me/installation \
/var/lib/left4me/overlays \
/var/lib/left4me/instances \
/var/lib/left4me/runtime \
/var/lib/left4me/workshop_cache \
/var/lib/left4me/tmp
# /var/lib/left4me is left4me's home dir (mode 0700 from useradd --create-home).
# Allow other uids (notably l4d2-sandbox, used by script overlay builds) to
# traverse — but not list — so the bwrap bind-mount can resolve the overlay
# path under the dropped privilege.
$sudo_cmd chmod 0711 /var/lib/left4me
$sudo_cmd chown -R left4me:left4me /opt/left4me
mkdir -p "$repo_tmp"
tar -xzf "$archive" -C "$repo_tmp"
if [ -d /opt/left4me/.venv ]; then
$sudo_cmd mv /opt/left4me/.venv "$remote_tmp/venv"
fi
$sudo_cmd find /opt/left4me -mindepth 1 -maxdepth 1 -exec rm -rf {} +
$sudo_cmd cp -R "$repo_tmp"/. /opt/left4me/
if [ -d "$remote_tmp/venv" ]; then
$sudo_cmd mv "$remote_tmp/venv" /opt/left4me/.venv
fi
$sudo_cmd chown -R left4me:left4me /opt/left4me
$sudo_cmd cp /opt/left4me/deploy/files/usr/local/lib/systemd/system/left4me-web.service /usr/local/lib/systemd/system/left4me-web.service
$sudo_cmd cp /opt/left4me/deploy/files/usr/local/lib/systemd/system/left4me-server@.service /usr/local/lib/systemd/system/left4me-server@.service
$sudo_cmd cp /opt/left4me/deploy/files/usr/local/libexec/left4me/left4me-systemctl /usr/local/libexec/left4me/left4me-systemctl
$sudo_cmd cp /opt/left4me/deploy/files/usr/local/libexec/left4me/left4me-journalctl /usr/local/libexec/left4me/left4me-journalctl
$sudo_cmd cp /opt/left4me/deploy/files/usr/local/libexec/left4me/left4me-overlay /usr/local/libexec/left4me/left4me-overlay
$sudo_cmd cp /opt/left4me/deploy/files/usr/local/libexec/left4me/left4me-script-sandbox /usr/local/libexec/left4me/left4me-script-sandbox
$sudo_cmd chmod 0755 /usr/local/libexec/left4me/left4me-systemctl /usr/local/libexec/left4me/left4me-journalctl /usr/local/libexec/left4me/left4me-overlay /usr/local/libexec/left4me/left4me-script-sandbox
$sudo_cmd cp /opt/left4me/deploy/files/etc/sudoers.d/left4me /etc/sudoers.d/left4me
$sudo_cmd chmod 0440 /etc/sudoers.d/left4me
$sudo_cmd visudo -cf /etc/sudoers.d/left4me
$sudo_cmd cp /opt/left4me/deploy/templates/etc/left4me/host.env /etc/left4me/host.env
$sudo_cmd chmod 0644 /etc/left4me/host.env
if [ ! -f /etc/left4me/web.env ]; then
secret_key=$(python3 -c 'import secrets; print(secrets.token_hex(32))')
tmp_web_env="$remote_tmp/web.env"
{
printf 'DATABASE_URL=sqlite:////var/lib/left4me/left4me.db\n'
printf 'SECRET_KEY=%s\n' "$secret_key"
printf 'JOB_WORKER_THREADS=4\n'
printf 'SESSION_COOKIE_SECURE=false\n'
} > "$tmp_web_env"
$sudo_cmd install -m 0640 -o root -g left4me "$tmp_web_env" /etc/left4me/web.env
fi
if [ ! -x /opt/left4me/.venv/bin/python ]; then
run_as_left4me python3 -m venv /opt/left4me/.venv
fi
run_as_left4me /opt/left4me/.venv/bin/python -m pip install --upgrade pip
run_as_left4me /opt/left4me/.venv/bin/pip install -e /opt/left4me/l4d2host -e /opt/left4me/l4d2web
run_as_left4me sh -c "cd /opt/left4me/l4d2web && set -a; . /etc/left4me/host.env; . /etc/left4me/web.env; set +a; env \
JOB_WORKER_ENABLED=false \
PYTHONPATH=/opt/left4me \
/opt/left4me/.venv/bin/alembic -c /opt/left4me/l4d2web/alembic.ini upgrade head"
if [ -f "$remote_tmp/admin_username" ] && [ -f "$remote_tmp/admin_password" ]; then
LEFT4ME_ADMIN_USERNAME=$(cat "$remote_tmp/admin_username")
LEFT4ME_ADMIN_PASSWORD=$(cat "$remote_tmp/admin_password")
if ! create_user_output=$(run_left4me_with_env env \
JOB_WORKER_ENABLED=false \
LEFT4ME_ADMIN_PASSWORD="$LEFT4ME_ADMIN_PASSWORD" \
/opt/left4me/.venv/bin/flask --app l4d2web.app:create_app create-user "$LEFT4ME_ADMIN_USERNAME" --admin 2>&1); then
case "$create_user_output" in
*'user already exists'*) printf '%s\n' "$create_user_output" ;;
*) printf '%s\n' "$create_user_output" >&2; exit 1 ;;
esac
else
printf '%s\n' "$create_user_output"
fi
fi
# One-shot migration: fuse-overlayfs running as the left4me user used
# user.fuseoverlayfs.* xattrs for whiteouts and opaque-dir markers; kernel
# overlayfs ignores those entirely, so a pre-existing upper/ from the fuse
# era would resurrect "deleted" files. Wipe upper/ and work/ for every
# instance once, gated by a sentinel file so reruns are no-ops.
overlay_sentinel=/var/lib/left4me/.kernel-overlay-migrated
if [ ! -e "$overlay_sentinel" ]; then
$sudo_cmd sh -c "systemctl stop 'left4me-server@*.service' 2>/dev/null || true"
$sudo_cmd systemctl stop left4me-web.service 2>/dev/null || true
$sudo_cmd sh -c "findmnt -t fuse.fuse-overlayfs -o TARGET --noheadings 2>/dev/null | xargs -r -n1 umount -l 2>/dev/null || true"
$sudo_cmd sh -c "findmnt -t overlay -o TARGET --noheadings 2>/dev/null | grep '/var/lib/left4me/runtime/' | xargs -r -n1 umount -l 2>/dev/null || true"
$sudo_cmd sh -c 'for d in /var/lib/left4me/runtime/*/; do [ -d "$d" ] || continue; rm -rf "$d/upper" "$d/work"; mkdir -p "$d/upper" "$d/work"; chown left4me:left4me "$d/upper" "$d/work"; done'
$sudo_cmd touch "$overlay_sentinel"
$sudo_cmd chown left4me:left4me "$overlay_sentinel"
fi
# One-shot migration: 0005_script_overlays drops the legacy
# l4d2center_maps / cedapug_maps overlay rows but doesn't touch their
# directories under /var/lib/left4me/overlays/{id}. Without cleanup, when
# AUTOINCREMENT (or its absence after the 0002 batch_alter_table recreate)
# re-issues an id matching one of those orphan dirs, the web app's
# create_overlay_directory(exist_ok=False) fails with FileExistsError.
# Sweep any overlay dir whose id has no matching DB row, plus the
# now-unused global_overlay_cache.
overlay_orphan_sentinel=/var/lib/left4me/.script-overlays-orphans-cleaned
if [ ! -e "$overlay_orphan_sentinel" ]; then
$sudo_cmd rm -rf /var/lib/left4me/global_overlay_cache
$sudo_cmd sh -c '
cd /var/lib/left4me/overlays || exit 0
ids_in_db=$(/opt/left4me/.venv/bin/python -c "
import sqlite3
c = sqlite3.connect(\"/var/lib/left4me/left4me.db\")
print(\" \".join(str(r[0]) for r in c.execute(\"SELECT id FROM overlays\")))
")
for d in */; do
id=${d%/}
case " $ids_in_db " in
*" $id "*) ;;
*) echo "removing orphan overlay dir: $id"; rm -rf "$id" ;;
esac
done
'
$sudo_cmd touch "$overlay_orphan_sentinel"
$sudo_cmd chown left4me:left4me "$overlay_orphan_sentinel"
fi
$sudo_cmd systemctl daemon-reload
$sudo_cmd systemctl enable --now left4me-web.service
$sudo_cmd systemctl restart left4me-web.service
for attempt in 1 2 3 4 5 6 7 8 9 10; do
if curl -fsS http://127.0.0.1:8000/health; then
exit 0
fi
sleep 1
done
$sudo_cmd systemctl status left4me-web.service --no-pager >&2 || true
$sudo_cmd journalctl -u left4me-web.service -n 80 --no-pager >&2 || true
exit 1
REMOTE