left4me/l4d2web/services/overlay_builders.py
mwiegand 16adc5c1fe
overlay_builders: download missing/stale workshop items inline
Replace the old skip-uncached-with-warning logic in WorkshopBuilder.build
with an inline download phase that calls _download_with_retry for each item
whose cache file is absent or stale (mtime/size mismatch). Stamps
last_downloaded_at / last_error after each download, and skips items with
no file_url. Update test fixture to utime cache files so mtime matches
time_updated, delete the now-superseded skip-warning test, and add six
new builder-level behavior tests covering the new download path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-11 22:56:09 +02:00

420 lines
14 KiB
Python

"""Overlay builder registry.
Each `Overlay.type` maps to a builder. `build_overlay(overlay_id)` jobs (and
the synchronous `initialize_server` hook) dispatch through `BUILDERS`. Adding
a new overlay type means writing a new builder and registering it here — no
changes to the worker, the mount layer, or the blueprint editor.
"""
from __future__ import annotations
import os
import subprocess
import tempfile
import time
from datetime import UTC, datetime
from pathlib import Path
from typing import Callable, Protocol
import requests
from sqlalchemy import select
from l4d2host.paths import get_left4me_root
from l4d2web.db import session_scope
from l4d2web.models import Overlay, OverlayWorkshopItem, WorkshopItem
from l4d2web.services.host_commands import run_command
from l4d2web.services.steam_workshop import WorkshopMetadata, download_to_cache
from l4d2web.services.workshop_paths import cache_path, workshop_cache_root
CancelCheck = Callable[[], bool]
LogSink = Callable[[str], None]
SCRIPT_SANDBOX_HELPER = "/usr/local/libexec/left4me/left4me-script-sandbox"
DISK_BUDGET_BYTES = 20 * 1024**3
DOWNLOAD_RETRY_ATTEMPTS = 3
DOWNLOAD_RETRY_BACKOFF_SECONDS = (1.0, 2.0)
assert len(DOWNLOAD_RETRY_BACKOFF_SECONDS) == DOWNLOAD_RETRY_ATTEMPTS - 1
def _sleep_with_cancel(
seconds: float,
should_cancel: CancelCheck,
*,
poll_interval: float = 0.25,
) -> bool:
"""Sleep up to `seconds`, returning early (True) if `should_cancel` becomes
True. Returns False on a full uninterrupted sleep. Polls every
`poll_interval` seconds."""
deadline = time.monotonic() + seconds
while True:
if should_cancel():
return True
remaining = deadline - time.monotonic()
if remaining <= 0:
return False
time.sleep(min(poll_interval, remaining))
def _download_with_retry(
meta: WorkshopMetadata,
cache_root: Path,
*,
on_stderr: LogSink,
should_cancel: CancelCheck,
) -> None:
"""Wrap `download_to_cache` with bounded retries and cancel-aware backoff.
Raises the last exception after `DOWNLOAD_RETRY_ATTEMPTS` failures.
Raises `InterruptedError` if cancelled during a backoff sleep."""
last_exc: BaseException | None = None
for attempt in range(1, DOWNLOAD_RETRY_ATTEMPTS + 1):
try:
download_to_cache(meta, cache_root, should_cancel=should_cancel)
return
except InterruptedError:
raise
except (requests.RequestException, OSError) as exc:
last_exc = exc
if attempt == DOWNLOAD_RETRY_ATTEMPTS:
raise
on_stderr(
f"workshop {meta.steam_id} attempt {attempt}/"
f"{DOWNLOAD_RETRY_ATTEMPTS} failed: {exc}"
)
delay = DOWNLOAD_RETRY_BACKOFF_SECONDS[attempt - 1]
if _sleep_with_cancel(delay, should_cancel):
raise InterruptedError("download cancelled during backoff") from last_exc
def _sandbox_script_dir() -> Path:
"""Where script tmpfiles live before being bind-mounted into the sandbox.
Cannot live in /tmp because the web service unit has PrivateTmp=yes:
its /tmp is a per-instance namespace that PID 1 (which actually performs
the BindReadOnlyPaths during sandbox setup) cannot resolve. /var/lib is
not affected by PrivateTmp and is visible to PID 1, so the bind-mount
succeeds.
"""
return get_left4me_root() / "sandbox-scripts"
class BuildError(RuntimeError):
"""Raised by builders when a build fails for a builder-specific reason
(e.g. disk-budget exceeded). Distinct from subprocess-level
HostCommandError / CommandCancelledError."""
class OverlayBuilder(Protocol):
def build(
self,
overlay: Overlay,
*,
on_stdout: LogSink,
on_stderr: LogSink,
should_cancel: CancelCheck,
) -> None: ...
def _overlay_root(overlay: Overlay) -> Path:
return get_left4me_root() / "overlays" / overlay.path
def overlay_path_for_id(overlay_id: int) -> Path:
return get_left4me_root() / "overlays" / str(overlay_id)
class WorkshopBuilder:
"""Diff-apply symlinks under `left4dead2/addons/` against the overlay's
current `WorkshopItem` associations. Downloads missing or stale items
before applying symlinks. Items with no file_url are skipped with a
warning."""
def build(
self,
overlay: Overlay,
*,
on_stdout: LogSink,
on_stderr: LogSink,
should_cancel: CancelCheck,
) -> None:
addons_dir = _overlay_root(overlay) / "left4dead2" / "addons"
addons_dir.mkdir(parents=True, exist_ok=True)
# Snapshot every field the decision logic + downloader will need.
with session_scope() as db:
rows = db.scalars(
select(WorkshopItem)
.join(
OverlayWorkshopItem,
OverlayWorkshopItem.workshop_item_id == WorkshopItem.id,
)
.where(OverlayWorkshopItem.overlay_id == overlay.id)
).all()
items_data = [
(
it.id,
it.steam_id,
it.title,
it.filename,
it.file_url,
it.file_size,
it.time_updated,
it.preview_url,
it.last_downloaded_at,
it.last_error,
)
for it in rows
]
cache_root = workshop_cache_root()
cache_root.mkdir(parents=True, exist_ok=True)
downloaded = 0
cached = 0
skipped: list[str] = []
# Download phase.
for (
item_id, steam_id, title, filename, file_url, file_size,
time_updated, preview_url, last_downloaded_at, last_error,
) in items_data:
if should_cancel():
on_stderr("workshop build cancelled during download phase")
return
if not file_url:
on_stderr(
f"workshop item {steam_id} skipped: no file_url "
f"(steam result: {last_error or 'unknown'})"
)
skipped.append(steam_id)
continue
target = cache_path(steam_id)
needs_download = (
last_downloaded_at is None
or not target.exists()
or int(target.stat().st_mtime) != int(time_updated)
or int(target.stat().st_size) != int(file_size)
)
if not needs_download:
cached += 1
continue
meta = WorkshopMetadata(
steam_id=steam_id,
title=title,
filename=filename,
file_url=file_url,
file_size=file_size,
time_updated=time_updated,
preview_url=preview_url,
consumer_app_id=550,
result=1,
)
on_stdout(f"workshop {steam_id} downloading")
try:
_download_with_retry(
meta, cache_root,
on_stderr=on_stderr, should_cancel=should_cancel,
)
except InterruptedError:
raise
except Exception as exc:
with session_scope() as db:
wi = db.scalar(select(WorkshopItem).where(WorkshopItem.id == item_id))
if wi is not None:
wi.last_error = f"download failed: {exc}"
raise
with session_scope() as db:
wi = db.scalar(select(WorkshopItem).where(WorkshopItem.id == item_id))
if wi is not None:
wi.last_downloaded_at = datetime.now(UTC)
wi.last_error = ""
downloaded += 1
# Re-snapshot for symlink phase: only items that have a cache file now
# belong in the desired set. Items skipped above stay out.
desired: dict[str, Path] = {}
for (
_item_id, steam_id, _title, _filename, _file_url, _file_size,
_time_updated, _preview_url, _last_downloaded_at, _last_error,
) in items_data:
if steam_id in skipped:
continue
target = cache_path(steam_id)
if not target.exists():
continue # shouldn't happen post-download; safety net
desired[f"{steam_id}.vpk"] = target.resolve()
if should_cancel():
on_stderr("workshop build cancelled before applying symlinks")
return
# existing: symlink-name -> link target (only symlinks pointing at our cache)
existing: dict[str, Path] = {}
for entry in os.scandir(addons_dir):
if not entry.is_symlink():
continue
try:
link_target = Path(os.readlink(entry.path))
except OSError:
continue
try:
resolved = link_target.resolve(strict=False)
except OSError:
continue
if not _is_under(resolved, cache_root):
continue
existing[entry.name] = resolved
created = 0
removed = 0
unchanged = 0
for name, current_target in existing.items():
if should_cancel():
on_stderr("workshop build cancelled mid-removal")
return
desired_target = desired.get(name)
if desired_target is None:
os.unlink(addons_dir / name)
removed += 1
elif current_target != desired_target:
os.unlink(addons_dir / name)
else:
unchanged += 1
post_removal_existing = {
name for name in existing
if name in desired and existing[name] == desired[name]
}
for name, target in desired.items():
if should_cancel():
on_stderr("workshop build cancelled mid-creation")
return
if name in post_removal_existing:
continue
os.symlink(target, addons_dir / name)
created += 1
on_stdout(
f"workshop overlay {overlay.name!r}: "
f"downloaded={downloaded} cached={cached} skipped={len(skipped)} "
f"created={created} removed={removed} unchanged={unchanged}"
)
def run_sandboxed_script(
overlay_id: int,
script_text: str,
*,
on_stdout: LogSink,
on_stderr: LogSink,
should_cancel: CancelCheck,
) -> None:
"""Write `script_text` to a tmpfile and exec it inside the privileged
sandbox helper. Used by ScriptBuilder.build and by the wipe route."""
script_dir = _sandbox_script_dir()
script_dir.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(
"w", suffix=".sh", delete=False, dir=str(script_dir)
) as f:
f.write(script_text or "")
script_path = f.name
# NamedTemporaryFile creates 0600 owned by the web user; the sandbox runs
# as l4d2-sandbox and needs to read it (bind-mounted at /script.sh inside
# the sandbox). Script content is not a secret — it's plain bash stored
# in the DB and editable by the user — so 0644 is appropriate.
os.chmod(script_path, 0o644)
try:
cmd = [
"sudo",
"-n",
SCRIPT_SANDBOX_HELPER,
str(overlay_id),
script_path,
]
run_command(
cmd,
on_stdout=on_stdout,
on_stderr=on_stderr,
should_cancel=should_cancel,
)
finally:
try:
os.unlink(script_path)
except FileNotFoundError:
pass
class ScriptBuilder:
"""Run an arbitrary user-authored bash script against the overlay dir
inside a bubblewrap + systemd-run sandbox. The script sees the overlay
dir as RW `/overlay` and a curated host RO mount; everything else is
isolated. After exit, enforce a 20 GB cap on `du -sb /overlay`."""
def build(
self,
overlay: Overlay,
*,
on_stdout: LogSink,
on_stderr: LogSink,
should_cancel: CancelCheck,
) -> None:
# Ensure target dir exists so the helper's bind-mount validation passes.
overlay_path_for_id(overlay.id).mkdir(parents=True, exist_ok=True)
run_sandboxed_script(
overlay.id,
overlay.script or "",
on_stdout=on_stdout,
on_stderr=on_stderr,
should_cancel=should_cancel,
)
self._enforce_disk_budget(overlay.id, on_stderr)
def _enforce_disk_budget(self, overlay_id: int, on_stderr: LogSink) -> None:
target = overlay_path_for_id(overlay_id)
size_output = subprocess.check_output(["du", "-sb", str(target)])
size_bytes = int(size_output.split()[0])
if size_bytes > DISK_BUDGET_BYTES:
on_stderr(
f"overlay exceeded 20 GB disk cap: {size_bytes} bytes > "
f"{DISK_BUDGET_BYTES} bytes"
)
raise BuildError("disk-cap-exceeded")
def _is_under(path: Path, root: Path) -> bool:
try:
path.relative_to(root)
except ValueError:
return False
return True
class FilesBuilder:
"""No-op builder for `files` overlays. Their content IS the overlay
directory — every save / upload / move / delete is immediately
authoritative. The build step exists only so the overlay-build subsystem
can dispatch uniformly across all overlay types; here it simply ensures
the overlay directory exists."""
def build(
self,
overlay: Overlay,
*,
on_stdout: LogSink,
on_stderr: LogSink,
should_cancel: CancelCheck,
) -> None:
overlay_path_for_id(overlay.id).mkdir(parents=True, exist_ok=True)
on_stdout(f"files overlay {overlay.name!r}: directory ensured (no-op build)")
BUILDERS: dict[str, OverlayBuilder] = {
"workshop": WorkshopBuilder(),
"script": ScriptBuilder(),
"files": FilesBuilder(),
}