overlay_builders: download missing/stale workshop items inline

Replace the old skip-uncached-with-warning logic in WorkshopBuilder.build
with an inline download phase that calls _download_with_retry for each item
whose cache file is absent or stale (mtime/size mismatch). Stamps
last_downloaded_at / last_error after each download, and skips items with
no file_url. Update test fixture to utime cache files so mtime matches
time_updated, delete the now-superseded skip-warning test, and add six
new builder-level behavior tests covering the new download path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mwiegand 2026-05-11 22:56:09 +02:00
parent 6fc7f87943
commit 16adc5c1fe
No known key found for this signature in database
2 changed files with 296 additions and 63 deletions

View file

@ -11,6 +11,7 @@ import os
import subprocess import subprocess
import tempfile import tempfile
import time import time
from datetime import UTC, datetime
from pathlib import Path from pathlib import Path
from typing import Callable, Protocol from typing import Callable, Protocol
@ -126,9 +127,9 @@ def overlay_path_for_id(overlay_id: int) -> Path:
class WorkshopBuilder: class WorkshopBuilder:
"""Diff-apply symlinks under `left4dead2/addons/` against the overlay's """Diff-apply symlinks under `left4dead2/addons/` against the overlay's
current `WorkshopItem` associations. Cached items get an absolute symlink current `WorkshopItem` associations. Downloads missing or stale items
into `workshop_cache/{steam_id}.vpk`. Items missing from cache are before applying symlinks. Items with no file_url are skipped with a
skipped with a warning rather than turned into broken symlinks.""" warning."""
def build( def build(
self, self,
@ -141,8 +142,9 @@ class WorkshopBuilder:
addons_dir = _overlay_root(overlay) / "left4dead2" / "addons" addons_dir = _overlay_root(overlay) / "left4dead2" / "addons"
addons_dir.mkdir(parents=True, exist_ok=True) addons_dir.mkdir(parents=True, exist_ok=True)
# Snapshot every field the decision logic + downloader will need.
with session_scope() as db: with session_scope() as db:
items = db.scalars( rows = db.scalars(
select(WorkshopItem) select(WorkshopItem)
.join( .join(
OverlayWorkshopItem, OverlayWorkshopItem,
@ -150,37 +152,115 @@ class WorkshopBuilder:
) )
.where(OverlayWorkshopItem.overlay_id == overlay.id) .where(OverlayWorkshopItem.overlay_id == overlay.id)
).all() ).all()
# Detach items so we can use them outside the session.
items_data = [ items_data = [
(it.steam_id, it.last_downloaded_at) for it in items (
it.id,
it.steam_id,
it.title,
it.filename,
it.file_url,
it.file_size,
it.time_updated,
it.preview_url,
it.last_downloaded_at,
it.last_error,
)
for it in rows
] ]
cache_root = workshop_cache_root() cache_root = workshop_cache_root()
# desired: symlink-name -> absolute target path (only for cached items) cache_root.mkdir(parents=True, exist_ok=True)
desired: dict[str, Path] = {}
downloaded = 0
cached = 0
skipped: list[str] = [] skipped: list[str] = []
for steam_id, last_downloaded_at in items_data:
target = cache_path(steam_id) # Download phase.
if last_downloaded_at is None or not target.exists(): for (
item_id, steam_id, title, filename, file_url, file_size,
time_updated, preview_url, last_downloaded_at, last_error,
) in items_data:
if should_cancel():
on_stderr("workshop build cancelled during download phase")
return
if not file_url:
on_stderr(
f"workshop item {steam_id} skipped: no file_url "
f"(steam result: {last_error or 'unknown'})"
)
skipped.append(steam_id) skipped.append(steam_id)
continue continue
target = cache_path(steam_id)
needs_download = (
last_downloaded_at is None
or not target.exists()
or int(target.stat().st_mtime) != int(time_updated)
or int(target.stat().st_size) != int(file_size)
)
if not needs_download:
cached += 1
continue
meta = WorkshopMetadata(
steam_id=steam_id,
title=title,
filename=filename,
file_url=file_url,
file_size=file_size,
time_updated=time_updated,
preview_url=preview_url,
consumer_app_id=550,
result=1,
)
on_stdout(f"workshop {steam_id} downloading")
try:
_download_with_retry(
meta, cache_root,
on_stderr=on_stderr, should_cancel=should_cancel,
)
except InterruptedError:
raise
except Exception as exc:
with session_scope() as db:
wi = db.scalar(select(WorkshopItem).where(WorkshopItem.id == item_id))
if wi is not None:
wi.last_error = f"download failed: {exc}"
raise
with session_scope() as db:
wi = db.scalar(select(WorkshopItem).where(WorkshopItem.id == item_id))
if wi is not None:
wi.last_downloaded_at = datetime.now(UTC)
wi.last_error = ""
downloaded += 1
# Re-snapshot for symlink phase: only items that have a cache file now
# belong in the desired set. Items skipped above stay out.
desired: dict[str, Path] = {}
for (
_item_id, steam_id, _title, _filename, _file_url, _file_size,
_time_updated, _preview_url, _last_downloaded_at, _last_error,
) in items_data:
if steam_id in skipped:
continue
target = cache_path(steam_id)
if not target.exists():
continue # shouldn't happen post-download; safety net
desired[f"{steam_id}.vpk"] = target.resolve() desired[f"{steam_id}.vpk"] = target.resolve()
if should_cancel(): if should_cancel():
on_stderr("workshop build cancelled before applying symlinks") on_stderr("workshop build cancelled before applying symlinks")
return return
# existing: symlink-name -> link target (only for symlinks pointing at our cache) # existing: symlink-name -> link target (only symlinks pointing at our cache)
existing: dict[str, Path] = {} existing: dict[str, Path] = {}
for entry in os.scandir(addons_dir): for entry in os.scandir(addons_dir):
if not entry.is_symlink(): if not entry.is_symlink():
continue continue
try: try:
target = Path(os.readlink(entry.path)) link_target = Path(os.readlink(entry.path))
except OSError: except OSError:
continue continue
try: try:
resolved = target.resolve(strict=False) resolved = link_target.resolve(strict=False)
except OSError: except OSError:
continue continue
if not _is_under(resolved, cache_root): if not _is_under(resolved, cache_root):
@ -191,7 +271,6 @@ class WorkshopBuilder:
removed = 0 removed = 0
unchanged = 0 unchanged = 0
# Remove obsolete or stale symlinks first.
for name, current_target in existing.items(): for name, current_target in existing.items():
if should_cancel(): if should_cancel():
on_stderr("workshop build cancelled mid-removal") on_stderr("workshop build cancelled mid-removal")
@ -202,49 +281,28 @@ class WorkshopBuilder:
removed += 1 removed += 1
elif current_target != desired_target: elif current_target != desired_target:
os.unlink(addons_dir / name) os.unlink(addons_dir / name)
# will be recreated below
else: else:
unchanged += 1 unchanged += 1
# Recompute existing post-removal so the create loop knows what's left.
post_removal_existing = { post_removal_existing = {
name for name in existing if name in desired and existing[name] == desired[name] name for name in existing
if name in desired and existing[name] == desired[name]
} }
# Create new symlinks.
for name, target in desired.items(): for name, target in desired.items():
if should_cancel(): if should_cancel():
on_stderr("workshop build cancelled mid-creation") on_stderr("workshop build cancelled mid-creation")
return return
if name in post_removal_existing: if name in post_removal_existing:
continue continue
link_path = addons_dir / name os.symlink(target, addons_dir / name)
# Defensive: if a non-symlink file collides with our name, leave it.
if link_path.exists() and not link_path.is_symlink():
on_stderr(
f"refusing to overwrite non-symlink at {link_path}; manual intervention required"
)
continue
if link_path.is_symlink():
# An obsolete symlink not in `existing` (target outside cache).
# We don't manage these — leave alone.
on_stderr(
f"refusing to overwrite foreign symlink at {link_path}"
)
continue
os.symlink(str(target), str(link_path))
created += 1 created += 1
on_stdout( on_stdout(
f"workshop overlay {overlay.name!r}: created={created} " f"workshop overlay {overlay.name!r}: "
f"removed={removed} unchanged={unchanged} " f"downloaded={downloaded} cached={cached} skipped={len(skipped)} "
f"skipped(uncached)={len(skipped)}" f"created={created} removed={removed} unchanged={unchanged}"
) )
for steam_id in skipped:
on_stderr(
f"workshop item {steam_id} skipped: not yet downloaded "
f"(refresh required before this overlay can mount it)"
)
def run_sandboxed_script( def run_sandboxed_script(

View file

@ -37,6 +37,7 @@ def _add_workshop_item(steam_id: str, *, downloaded: bool, cache_root: Path, con
if downloaded: if downloaded:
cache_root.mkdir(parents=True, exist_ok=True) cache_root.mkdir(parents=True, exist_ok=True)
(cache_root / f"{steam_id}.vpk").write_bytes(content) (cache_root / f"{steam_id}.vpk").write_bytes(content)
os.utime(cache_root / f"{steam_id}.vpk", (1700000000, 1700000000))
with session_scope() as s: with session_scope() as s:
wi = WorkshopItem( wi = WorkshopItem(
steam_id=steam_id, steam_id=steam_id,
@ -135,27 +136,6 @@ def test_workshop_builder_creates_absolute_symlinks(env: Path) -> None:
assert link_b.resolve() == (cache_root / "1002.vpk").resolve() assert link_b.resolve() == (cache_root / "1002.vpk").resolve()
def test_workshop_builder_skips_uncached_items_with_warning(env: Path) -> None:
_, overlay_id = _create_user_and_overlay("ws", "workshop")
cache_root = env / "workshop_cache"
cached = _add_workshop_item("1001", downloaded=True, cache_root=cache_root)
uncached = _add_workshop_item("9999", downloaded=False, cache_root=cache_root)
_associate(overlay_id, cached)
_associate(overlay_id, uncached)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
overlay = s.query(Overlay).filter_by(id=overlay_id).one()
overlay_builders.BUILDERS["workshop"].build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False
)
addons = env / "overlays" / "7" / "left4dead2" / "addons"
assert (addons / "1001.vpk").is_symlink()
assert not (addons / "9999.vpk").exists(), "must NOT create dangling symlink"
assert any("9999" in line and ("skip" in line.lower() or "uncached" in line.lower()) for line in err + out), err + out
def test_workshop_builder_rerun_is_idempotent(env: Path) -> None: def test_workshop_builder_rerun_is_idempotent(env: Path) -> None:
_, overlay_id = _create_user_and_overlay("ws", "workshop") _, overlay_id = _create_user_and_overlay("ws", "workshop")
@ -518,3 +498,198 @@ def test_download_with_retry_bails_when_cancelled_during_backoff(env, tmp_path,
meta, tmp_path / "cache", meta, tmp_path / "cache",
on_stderr=on_stderr, should_cancel=lambda: False, on_stderr=on_stderr, should_cancel=lambda: False,
) )
def _make_meta_from_db_row(steam_id: str, *, file_size: int, time_updated: int):
from l4d2web.services import steam_workshop
return steam_workshop.WorkshopMetadata(
steam_id=steam_id, title=f"item-{steam_id}", filename=f"orig-{steam_id}.vpk",
file_url=f"https://example.com/{steam_id}.vpk", file_size=file_size,
time_updated=time_updated, preview_url="", consumer_app_id=550, result=1,
)
def test_workshop_build_downloads_uncached_and_stamps_timestamp(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
cache_root = tmp_path / "workshop_cache"
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2001", downloaded=False, cache_root=cache_root)
_associate(overlay_id, item_id)
download_calls = []
def fake_download(meta, cache_root_arg, *, should_cancel=None):
download_calls.append(meta.steam_id)
cache_root_arg.mkdir(parents=True, exist_ok=True)
(cache_root_arg / f"{meta.steam_id}.vpk").write_bytes(b"data")
os.utime(cache_root_arg / f"{meta.steam_id}.vpk", (meta.time_updated, meta.time_updated))
monkeypatch.setattr(overlay_builders, "download_to_cache", fake_download)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
overlay = s.scalar(__import__("sqlalchemy").select(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert download_calls == ["2001"]
with session_scope() as s:
from sqlalchemy import select as _select
wi = s.scalar(_select(WorkshopItem).where(WorkshopItem.id == item_id))
assert wi.last_downloaded_at is not None
assert wi.last_error == ""
addons = tmp_path / "overlays" / "7" / "left4dead2" / "addons"
assert (addons / "2001.vpk").is_symlink()
def test_workshop_build_skips_already_cached(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
cache_root = tmp_path / "workshop_cache"
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2002", downloaded=True, cache_root=cache_root)
# Make the cache file's (mtime, size) match the DB row exactly.
file_path = cache_root / "2002.vpk"
os.utime(file_path, (1700000000, 1700000000))
with session_scope() as s:
from sqlalchemy import select as _sel, update as _upd
s.execute(_upd(WorkshopItem).where(WorkshopItem.id == item_id).values(
file_size=os.path.getsize(file_path), time_updated=1700000000,
))
_associate(overlay_id, item_id)
called = []
monkeypatch.setattr(
overlay_builders, "download_to_cache",
lambda *a, **kw: called.append(1),
)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert called == [], "should not call downloader for an already-cached item"
addons = tmp_path / "overlays" / "7" / "left4dead2" / "addons"
assert (addons / "2002.vpk").is_symlink()
def test_workshop_build_redownloads_stale_cache(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
cache_root = tmp_path / "workshop_cache"
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2003", downloaded=True, cache_root=cache_root)
with session_scope() as s:
from sqlalchemy import update as _upd
s.execute(_upd(WorkshopItem).where(WorkshopItem.id == item_id).values(
file_size=99, time_updated=1800000000,
))
_associate(overlay_id, item_id)
download_calls = []
def fake_download(meta, cache_root_arg, *, should_cancel=None):
download_calls.append(meta.steam_id)
(cache_root_arg / f"{meta.steam_id}.vpk").write_bytes(b"99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____")
os.utime(cache_root_arg / f"{meta.steam_id}.vpk", (meta.time_updated, meta.time_updated))
monkeypatch.setattr(overlay_builders, "download_to_cache", fake_download)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert download_calls == ["2003"]
def test_workshop_build_skips_items_with_no_file_url(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
with session_scope() as s:
wi = WorkshopItem(
steam_id="2004", title="gone", filename="",
file_url="", file_size=0, time_updated=0, preview_url="",
last_downloaded_at=None, last_error="steam result 9",
)
s.add(wi)
s.flush()
item_id = wi.id
_associate(overlay_id, item_id)
monkeypatch.setattr(
overlay_builders, "download_to_cache",
lambda *a, **kw: (_ for _ in ()).throw(AssertionError("must not be called")),
)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert any("2004" in line and "skipped" in line for line in err)
addons = tmp_path / "overlays" / "7" / "left4dead2" / "addons"
assert not (addons / "2004.vpk").exists()
def test_workshop_build_fails_when_all_retries_exhausted(env, tmp_path, monkeypatch):
import requests
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2005", downloaded=False, cache_root=tmp_path / "workshop_cache")
_associate(overlay_id, item_id)
monkeypatch.setattr(overlay_builders, "_sleep_with_cancel", lambda *a, **kw: False)
monkeypatch.setattr(
overlay_builders, "download_to_cache",
lambda *a, **kw: (_ for _ in ()).throw(requests.ConnectionError("net")),
)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
with pytest.raises(requests.ConnectionError):
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
with session_scope() as s:
from sqlalchemy import select as _sel
wi = s.scalar(_sel(WorkshopItem).where(WorkshopItem.id == item_id))
assert "download failed" in wi.last_error
def test_workshop_build_cancels_cleanly_during_download_phase(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2006", downloaded=False, cache_root=tmp_path / "workshop_cache")
_associate(overlay_id, item_id)
cancel_flag = {"v": False}
def fake_download(meta, cache_root, *, should_cancel=None):
cancel_flag["v"] = True
raise InterruptedError("cancelled")
monkeypatch.setattr(overlay_builders, "download_to_cache", fake_download)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
with pytest.raises(InterruptedError):
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr,
should_cancel=lambda: cancel_flag["v"],
)