overlay_builders: download missing/stale workshop items inline

Replace the old skip-uncached-with-warning logic in WorkshopBuilder.build
with an inline download phase that calls _download_with_retry for each item
whose cache file is absent or stale (mtime/size mismatch). Stamps
last_downloaded_at / last_error after each download, and skips items with
no file_url. Update test fixture to utime cache files so mtime matches
time_updated, delete the now-superseded skip-warning test, and add six
new builder-level behavior tests covering the new download path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mwiegand 2026-05-11 22:56:09 +02:00
parent 6fc7f87943
commit 16adc5c1fe
No known key found for this signature in database
2 changed files with 296 additions and 63 deletions

View file

@ -11,6 +11,7 @@ import os
import subprocess
import tempfile
import time
from datetime import UTC, datetime
from pathlib import Path
from typing import Callable, Protocol
@ -126,9 +127,9 @@ def overlay_path_for_id(overlay_id: int) -> Path:
class WorkshopBuilder:
"""Diff-apply symlinks under `left4dead2/addons/` against the overlay's
current `WorkshopItem` associations. Cached items get an absolute symlink
into `workshop_cache/{steam_id}.vpk`. Items missing from cache are
skipped with a warning rather than turned into broken symlinks."""
current `WorkshopItem` associations. Downloads missing or stale items
before applying symlinks. Items with no file_url are skipped with a
warning."""
def build(
self,
@ -141,8 +142,9 @@ class WorkshopBuilder:
addons_dir = _overlay_root(overlay) / "left4dead2" / "addons"
addons_dir.mkdir(parents=True, exist_ok=True)
# Snapshot every field the decision logic + downloader will need.
with session_scope() as db:
items = db.scalars(
rows = db.scalars(
select(WorkshopItem)
.join(
OverlayWorkshopItem,
@ -150,37 +152,115 @@ class WorkshopBuilder:
)
.where(OverlayWorkshopItem.overlay_id == overlay.id)
).all()
# Detach items so we can use them outside the session.
items_data = [
(it.steam_id, it.last_downloaded_at) for it in items
(
it.id,
it.steam_id,
it.title,
it.filename,
it.file_url,
it.file_size,
it.time_updated,
it.preview_url,
it.last_downloaded_at,
it.last_error,
)
for it in rows
]
cache_root = workshop_cache_root()
# desired: symlink-name -> absolute target path (only for cached items)
desired: dict[str, Path] = {}
cache_root.mkdir(parents=True, exist_ok=True)
downloaded = 0
cached = 0
skipped: list[str] = []
for steam_id, last_downloaded_at in items_data:
target = cache_path(steam_id)
if last_downloaded_at is None or not target.exists():
# Download phase.
for (
item_id, steam_id, title, filename, file_url, file_size,
time_updated, preview_url, last_downloaded_at, last_error,
) in items_data:
if should_cancel():
on_stderr("workshop build cancelled during download phase")
return
if not file_url:
on_stderr(
f"workshop item {steam_id} skipped: no file_url "
f"(steam result: {last_error or 'unknown'})"
)
skipped.append(steam_id)
continue
target = cache_path(steam_id)
needs_download = (
last_downloaded_at is None
or not target.exists()
or int(target.stat().st_mtime) != int(time_updated)
or int(target.stat().st_size) != int(file_size)
)
if not needs_download:
cached += 1
continue
meta = WorkshopMetadata(
steam_id=steam_id,
title=title,
filename=filename,
file_url=file_url,
file_size=file_size,
time_updated=time_updated,
preview_url=preview_url,
consumer_app_id=550,
result=1,
)
on_stdout(f"workshop {steam_id} downloading")
try:
_download_with_retry(
meta, cache_root,
on_stderr=on_stderr, should_cancel=should_cancel,
)
except InterruptedError:
raise
except Exception as exc:
with session_scope() as db:
wi = db.scalar(select(WorkshopItem).where(WorkshopItem.id == item_id))
if wi is not None:
wi.last_error = f"download failed: {exc}"
raise
with session_scope() as db:
wi = db.scalar(select(WorkshopItem).where(WorkshopItem.id == item_id))
if wi is not None:
wi.last_downloaded_at = datetime.now(UTC)
wi.last_error = ""
downloaded += 1
# Re-snapshot for symlink phase: only items that have a cache file now
# belong in the desired set. Items skipped above stay out.
desired: dict[str, Path] = {}
for (
_item_id, steam_id, _title, _filename, _file_url, _file_size,
_time_updated, _preview_url, _last_downloaded_at, _last_error,
) in items_data:
if steam_id in skipped:
continue
target = cache_path(steam_id)
if not target.exists():
continue # shouldn't happen post-download; safety net
desired[f"{steam_id}.vpk"] = target.resolve()
if should_cancel():
on_stderr("workshop build cancelled before applying symlinks")
return
# existing: symlink-name -> link target (only for symlinks pointing at our cache)
# existing: symlink-name -> link target (only symlinks pointing at our cache)
existing: dict[str, Path] = {}
for entry in os.scandir(addons_dir):
if not entry.is_symlink():
continue
try:
target = Path(os.readlink(entry.path))
link_target = Path(os.readlink(entry.path))
except OSError:
continue
try:
resolved = target.resolve(strict=False)
resolved = link_target.resolve(strict=False)
except OSError:
continue
if not _is_under(resolved, cache_root):
@ -191,7 +271,6 @@ class WorkshopBuilder:
removed = 0
unchanged = 0
# Remove obsolete or stale symlinks first.
for name, current_target in existing.items():
if should_cancel():
on_stderr("workshop build cancelled mid-removal")
@ -202,48 +281,27 @@ class WorkshopBuilder:
removed += 1
elif current_target != desired_target:
os.unlink(addons_dir / name)
# will be recreated below
else:
unchanged += 1
# Recompute existing post-removal so the create loop knows what's left.
post_removal_existing = {
name for name in existing if name in desired and existing[name] == desired[name]
name for name in existing
if name in desired and existing[name] == desired[name]
}
# Create new symlinks.
for name, target in desired.items():
if should_cancel():
on_stderr("workshop build cancelled mid-creation")
return
if name in post_removal_existing:
continue
link_path = addons_dir / name
# Defensive: if a non-symlink file collides with our name, leave it.
if link_path.exists() and not link_path.is_symlink():
on_stderr(
f"refusing to overwrite non-symlink at {link_path}; manual intervention required"
)
continue
if link_path.is_symlink():
# An obsolete symlink not in `existing` (target outside cache).
# We don't manage these — leave alone.
on_stderr(
f"refusing to overwrite foreign symlink at {link_path}"
)
continue
os.symlink(str(target), str(link_path))
os.symlink(target, addons_dir / name)
created += 1
on_stdout(
f"workshop overlay {overlay.name!r}: created={created} "
f"removed={removed} unchanged={unchanged} "
f"skipped(uncached)={len(skipped)}"
)
for steam_id in skipped:
on_stderr(
f"workshop item {steam_id} skipped: not yet downloaded "
f"(refresh required before this overlay can mount it)"
f"workshop overlay {overlay.name!r}: "
f"downloaded={downloaded} cached={cached} skipped={len(skipped)} "
f"created={created} removed={removed} unchanged={unchanged}"
)

View file

@ -37,6 +37,7 @@ def _add_workshop_item(steam_id: str, *, downloaded: bool, cache_root: Path, con
if downloaded:
cache_root.mkdir(parents=True, exist_ok=True)
(cache_root / f"{steam_id}.vpk").write_bytes(content)
os.utime(cache_root / f"{steam_id}.vpk", (1700000000, 1700000000))
with session_scope() as s:
wi = WorkshopItem(
steam_id=steam_id,
@ -135,27 +136,6 @@ def test_workshop_builder_creates_absolute_symlinks(env: Path) -> None:
assert link_b.resolve() == (cache_root / "1002.vpk").resolve()
def test_workshop_builder_skips_uncached_items_with_warning(env: Path) -> None:
_, overlay_id = _create_user_and_overlay("ws", "workshop")
cache_root = env / "workshop_cache"
cached = _add_workshop_item("1001", downloaded=True, cache_root=cache_root)
uncached = _add_workshop_item("9999", downloaded=False, cache_root=cache_root)
_associate(overlay_id, cached)
_associate(overlay_id, uncached)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
overlay = s.query(Overlay).filter_by(id=overlay_id).one()
overlay_builders.BUILDERS["workshop"].build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False
)
addons = env / "overlays" / "7" / "left4dead2" / "addons"
assert (addons / "1001.vpk").is_symlink()
assert not (addons / "9999.vpk").exists(), "must NOT create dangling symlink"
assert any("9999" in line and ("skip" in line.lower() or "uncached" in line.lower()) for line in err + out), err + out
def test_workshop_builder_rerun_is_idempotent(env: Path) -> None:
_, overlay_id = _create_user_and_overlay("ws", "workshop")
@ -518,3 +498,198 @@ def test_download_with_retry_bails_when_cancelled_during_backoff(env, tmp_path,
meta, tmp_path / "cache",
on_stderr=on_stderr, should_cancel=lambda: False,
)
def _make_meta_from_db_row(steam_id: str, *, file_size: int, time_updated: int):
from l4d2web.services import steam_workshop
return steam_workshop.WorkshopMetadata(
steam_id=steam_id, title=f"item-{steam_id}", filename=f"orig-{steam_id}.vpk",
file_url=f"https://example.com/{steam_id}.vpk", file_size=file_size,
time_updated=time_updated, preview_url="", consumer_app_id=550, result=1,
)
def test_workshop_build_downloads_uncached_and_stamps_timestamp(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
cache_root = tmp_path / "workshop_cache"
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2001", downloaded=False, cache_root=cache_root)
_associate(overlay_id, item_id)
download_calls = []
def fake_download(meta, cache_root_arg, *, should_cancel=None):
download_calls.append(meta.steam_id)
cache_root_arg.mkdir(parents=True, exist_ok=True)
(cache_root_arg / f"{meta.steam_id}.vpk").write_bytes(b"data")
os.utime(cache_root_arg / f"{meta.steam_id}.vpk", (meta.time_updated, meta.time_updated))
monkeypatch.setattr(overlay_builders, "download_to_cache", fake_download)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
overlay = s.scalar(__import__("sqlalchemy").select(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert download_calls == ["2001"]
with session_scope() as s:
from sqlalchemy import select as _select
wi = s.scalar(_select(WorkshopItem).where(WorkshopItem.id == item_id))
assert wi.last_downloaded_at is not None
assert wi.last_error == ""
addons = tmp_path / "overlays" / "7" / "left4dead2" / "addons"
assert (addons / "2001.vpk").is_symlink()
def test_workshop_build_skips_already_cached(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
cache_root = tmp_path / "workshop_cache"
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2002", downloaded=True, cache_root=cache_root)
# Make the cache file's (mtime, size) match the DB row exactly.
file_path = cache_root / "2002.vpk"
os.utime(file_path, (1700000000, 1700000000))
with session_scope() as s:
from sqlalchemy import select as _sel, update as _upd
s.execute(_upd(WorkshopItem).where(WorkshopItem.id == item_id).values(
file_size=os.path.getsize(file_path), time_updated=1700000000,
))
_associate(overlay_id, item_id)
called = []
monkeypatch.setattr(
overlay_builders, "download_to_cache",
lambda *a, **kw: called.append(1),
)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert called == [], "should not call downloader for an already-cached item"
addons = tmp_path / "overlays" / "7" / "left4dead2" / "addons"
assert (addons / "2002.vpk").is_symlink()
def test_workshop_build_redownloads_stale_cache(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
cache_root = tmp_path / "workshop_cache"
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2003", downloaded=True, cache_root=cache_root)
with session_scope() as s:
from sqlalchemy import update as _upd
s.execute(_upd(WorkshopItem).where(WorkshopItem.id == item_id).values(
file_size=99, time_updated=1800000000,
))
_associate(overlay_id, item_id)
download_calls = []
def fake_download(meta, cache_root_arg, *, should_cancel=None):
download_calls.append(meta.steam_id)
(cache_root_arg / f"{meta.steam_id}.vpk").write_bytes(b"99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____99bytes____")
os.utime(cache_root_arg / f"{meta.steam_id}.vpk", (meta.time_updated, meta.time_updated))
monkeypatch.setattr(overlay_builders, "download_to_cache", fake_download)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert download_calls == ["2003"]
def test_workshop_build_skips_items_with_no_file_url(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
with session_scope() as s:
wi = WorkshopItem(
steam_id="2004", title="gone", filename="",
file_url="", file_size=0, time_updated=0, preview_url="",
last_downloaded_at=None, last_error="steam result 9",
)
s.add(wi)
s.flush()
item_id = wi.id
_associate(overlay_id, item_id)
monkeypatch.setattr(
overlay_builders, "download_to_cache",
lambda *a, **kw: (_ for _ in ()).throw(AssertionError("must not be called")),
)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
assert any("2004" in line and "skipped" in line for line in err)
addons = tmp_path / "overlays" / "7" / "left4dead2" / "addons"
assert not (addons / "2004.vpk").exists()
def test_workshop_build_fails_when_all_retries_exhausted(env, tmp_path, monkeypatch):
import requests
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2005", downloaded=False, cache_root=tmp_path / "workshop_cache")
_associate(overlay_id, item_id)
monkeypatch.setattr(overlay_builders, "_sleep_with_cancel", lambda *a, **kw: False)
monkeypatch.setattr(
overlay_builders, "download_to_cache",
lambda *a, **kw: (_ for _ in ()).throw(requests.ConnectionError("net")),
)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
with pytest.raises(requests.ConnectionError):
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=lambda: False,
)
with session_scope() as s:
from sqlalchemy import select as _sel
wi = s.scalar(_sel(WorkshopItem).where(WorkshopItem.id == item_id))
assert "download failed" in wi.last_error
def test_workshop_build_cancels_cleanly_during_download_phase(env, tmp_path, monkeypatch):
monkeypatch.setenv("LEFT4ME_ROOT", str(tmp_path))
user_id, overlay_id = _create_user_and_overlay("ws", "workshop")
item_id = _add_workshop_item("2006", downloaded=False, cache_root=tmp_path / "workshop_cache")
_associate(overlay_id, item_id)
cancel_flag = {"v": False}
def fake_download(meta, cache_root, *, should_cancel=None):
cancel_flag["v"] = True
raise InterruptedError("cancelled")
monkeypatch.setattr(overlay_builders, "download_to_cache", fake_download)
out, err, on_stdout, on_stderr = _capture_logs()
with session_scope() as s:
from sqlalchemy import select as _sel
overlay = s.scalar(_sel(Overlay).where(Overlay.id == overlay_id))
s.expunge(overlay)
with pytest.raises(InterruptedError):
overlay_builders.WorkshopBuilder().build(
overlay, on_stdout=on_stdout, on_stderr=on_stderr,
should_cancel=lambda: cancel_flag["v"],
)