feat(l4d2-web): add job pages and cancellation

This commit is contained in:
mwiegand 2026-05-06 15:05:13 +02:00
parent 91d042cf33
commit a347829608
No known key found for this signature in database
19 changed files with 635 additions and 83 deletions

View file

@ -0,0 +1,64 @@
# L4D2 Job Pages and Cancellation Follow-Up
## Goal
Make queued and running lifecycle jobs easier to inspect and stop from the web UI.
## Scope
- Add job list navigation for server pages and admin pages.
- Add a job detail page with persisted command logs streamed through the existing SSE endpoint.
- Add cancellation for queued jobs first.
- Add best-effort cancellation for running jobs by terminating the subprocess owned by `l4d2host.process.run_command()`.
## Slice 1: Job Browsing and Queued Cancel
### Behavior
- `/servers/<server_id>` shows recent jobs for that server and links to the full job history.
- `/servers/<server_id>/jobs` shows all jobs for that server, newest first.
- `/jobs/<job_id>` shows job metadata and live/replayed logs.
- `/admin/jobs` reuses the same job table markup and links every job to its detail page.
- `POST /jobs/<job_id>/cancel` cancels queued jobs only.
- Owners can view/cancel their own jobs.
- Admins can view/cancel any job.
### Implementation Notes
- Use one reusable Jinja partial for job tables.
- Show cancel buttons only for `queued` jobs in this slice.
- Cancelling a queued job sets `state="cancelled"`, `finished_at`, `updated_at`, and `exit_code=1`.
- Append a `stderr` job-log line explaining that the job was cancelled before execution.
- Do not revert `Server.desired_state`; cancellation prevents execution but is not rollback.
### Verification
- `pytest l4d2web/tests/test_pages.py -q`
- `pytest l4d2web/tests/test_job_logs.py -q`
- `pytest l4d2web/tests -q`
## Slice 2: Running Job Cancellation
### Behavior
- Running jobs expose the same cancel action.
- Cancelling a running job marks it `cancelling` while the subprocess is being terminated.
- Once the subprocess exits because of cancellation, the job finishes as `cancelled`.
- Cancellation is best-effort and is not rollback; partial runtime state may remain.
- Server actual state is refreshed after a cancelled server job when possible.
### Implementation Notes
- Add cancellation primitives in `l4d2host.process`.
- Launch subprocesses in their own process group/session when a cancel token is supplied.
- On cancellation, send terminate, wait briefly, then force kill.
- Thread the cancel token through `l4d2host` lifecycle APIs, `l4d2web.services.l4d2_facade`, and `l4d2web.services.job_worker`.
- Keep v1 single-process assumptions; cancellation requests are DB-backed, while process handles stay process-local.
### Verification
- `pytest l4d2host/tests/test_process.py -q`
- `pytest l4d2host/tests -q`
- `pytest l4d2web/tests/test_job_worker.py -q`
- `pytest l4d2web/tests/test_job_logs.py -q`
- `pytest l4d2web/tests -q`

View file

@ -16,6 +16,7 @@ class FuseOverlayFSMounter(OverlayMounter):
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
run_command( run_command(
[ [
@ -27,6 +28,7 @@ class FuseOverlayFSMounter(OverlayMounter):
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )
def unmount( def unmount(
@ -36,10 +38,12 @@ class FuseOverlayFSMounter(OverlayMounter):
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
run_command( run_command(
["fusermount3", "-u", str(merged)], ["fusermount3", "-u", str(merged)],
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )

View file

@ -18,6 +18,7 @@ def initialize_instance(
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
spec = load_spec(spec_path) spec = load_spec(spec_path)
@ -45,7 +46,7 @@ def initialize_instance(
if root.resolve() == DEFAULT_ROOT: if root.resolve() == DEFAULT_ROOT:
ensure_template_unit() ensure_template_unit()
daemon_reload(on_stdout=on_stdout, on_stderr=on_stderr, passthrough=passthrough) daemon_reload(on_stdout=on_stdout, on_stderr=on_stderr, passthrough=passthrough, should_cancel=should_cancel)
def _load_instance_env(path: Path) -> dict[str, str]: def _load_instance_env(path: Path) -> dict[str, str]:
@ -65,6 +66,7 @@ def start_instance(
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
instance_dir = root / "instances" / name instance_dir = root / "instances" / name
runtime_dir = root / "runtime" / name runtime_dir = root / "runtime" / name
@ -85,6 +87,7 @@ def start_instance(
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )
target_cfg = runtime_dir / "merged" / "left4dead2" / "cfg" / "server.cfg" target_cfg = runtime_dir / "merged" / "left4dead2" / "cfg" / "server.cfg"
@ -96,6 +99,7 @@ def start_instance(
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )
@ -106,18 +110,21 @@ def stop_instance(
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
run_command( run_command(
["systemctl", "--user", "stop", f"l4d2@{name}.service"], ["systemctl", "--user", "stop", f"l4d2@{name}.service"],
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )
run_command( run_command(
["fusermount3", "-u", str(root / "runtime" / name / "merged")], ["fusermount3", "-u", str(root / "runtime" / name / "merged")],
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )
@ -128,6 +135,7 @@ def delete_instance(
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
instance_dir = root / "instances" / name instance_dir = root / "instances" / name
runtime_dir = root / "runtime" / name runtime_dir = root / "runtime" / name
@ -140,6 +148,7 @@ def delete_instance(
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )
merged = runtime_dir / "merged" merged = runtime_dir / "merged"
@ -149,6 +158,7 @@ def delete_instance(
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )
if instance_dir.exists(): if instance_dir.exists():

View file

@ -1,7 +1,10 @@
from dataclasses import dataclass from dataclasses import dataclass
import os
import signal
import subprocess import subprocess
import sys import sys
import threading import threading
import time
from typing import Callable, Sequence from typing import Callable, Sequence
@ -12,12 +15,19 @@ class CommandResult:
stderr: str stderr: str
class CommandCancelledError(subprocess.CalledProcessError):
pass
def run_command( def run_command(
cmd: Sequence[str], cmd: Sequence[str],
*, *,
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
cancel_poll_seconds: float = 0.2,
cancel_terminate_timeout: float = 2.0,
) -> CommandResult: ) -> CommandResult:
stdout_lines: list[str] = [] stdout_lines: list[str] = []
stderr_lines: list[str] = [] stderr_lines: list[str] = []
@ -28,8 +38,36 @@ def run_command(
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
text=True, text=True,
bufsize=1, bufsize=1,
start_new_session=should_cancel is not None,
) )
def emit_stderr_message(line: str) -> None:
stderr_lines.append(line)
if on_stderr is not None:
on_stderr(line)
if passthrough:
print(line, file=sys.stderr)
def terminate_process() -> None:
emit_stderr_message("cancellation requested; terminating subprocess")
if should_cancel is not None:
try:
os.killpg(proc.pid, signal.SIGTERM)
except ProcessLookupError:
pass
else:
proc.terminate()
def kill_process() -> None:
emit_stderr_message("subprocess did not exit after cancellation; killing subprocess")
if should_cancel is not None:
try:
os.killpg(proc.pid, signal.SIGKILL)
except ProcessLookupError:
pass
else:
proc.kill()
def pump( def pump(
stream, stream,
sink: list[str], sink: list[str],
@ -60,7 +98,21 @@ def run_command(
stdout_thread.start() stdout_thread.start()
stderr_thread.start() stderr_thread.start()
returncode = proc.wait() cancelled = False
while True:
returncode = proc.poll()
if returncode is not None:
break
if should_cancel is not None and should_cancel():
cancelled = True
terminate_process()
try:
returncode = proc.wait(timeout=cancel_terminate_timeout)
except subprocess.TimeoutExpired:
kill_process()
returncode = proc.wait()
break
time.sleep(cancel_poll_seconds)
stdout_thread.join() stdout_thread.join()
stderr_thread.join() stderr_thread.join()
@ -69,6 +121,13 @@ def run_command(
stdout="\n".join(stdout_lines), stdout="\n".join(stdout_lines),
stderr="\n".join(stderr_lines), stderr="\n".join(stderr_lines),
) )
if cancelled:
raise CommandCancelledError(
returncode=returncode,
cmd=list(cmd),
output=result.stdout,
stderr=result.stderr,
)
if returncode != 0: if returncode != 0:
raise subprocess.CalledProcessError( raise subprocess.CalledProcessError(
returncode=returncode, returncode=returncode,

View file

@ -15,6 +15,7 @@ class SteamInstaller:
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
for platform in ("windows", "linux"): for platform in ("windows", "linux"):
run_command( run_command(
@ -34,4 +35,5 @@ class SteamInstaller:
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )

View file

@ -20,10 +20,12 @@ def daemon_reload(
on_stdout: Callable[[str], None] | None = None, on_stdout: Callable[[str], None] | None = None,
on_stderr: Callable[[str], None] | None = None, on_stderr: Callable[[str], None] | None = None,
passthrough: bool = False, passthrough: bool = False,
should_cancel: Callable[[], bool] | None = None,
) -> None: ) -> None:
run_command( run_command(
["systemctl", "--user", "daemon-reload"], ["systemctl", "--user", "daemon-reload"],
on_stdout=on_stdout, on_stdout=on_stdout,
on_stderr=on_stderr, on_stderr=on_stderr,
passthrough=passthrough, passthrough=passthrough,
should_cancel=should_cancel,
) )

View file

@ -3,7 +3,7 @@ import subprocess
import pytest import pytest
from l4d2host.process import run_command from l4d2host.process import CommandCancelledError, run_command
def test_callbacks_receive_lines() -> None: def test_callbacks_receive_lines() -> None:
@ -23,6 +23,27 @@ def test_nonzero_exit_raises() -> None:
run_command(["python3", "-c", "import sys; sys.exit(7)"]) run_command(["python3", "-c", "import sys; sys.exit(7)"])
def test_cancelled_command_raises_cancelled_error() -> None:
should_cancel = False
lines: list[str] = []
def on_stdout(line: str) -> None:
nonlocal should_cancel
lines.append(line)
should_cancel = True
with pytest.raises(CommandCancelledError):
run_command(
["python3", "-c", "import time; print('ready', flush=True); time.sleep(30)"],
on_stdout=on_stdout,
should_cancel=lambda: should_cancel,
cancel_poll_seconds=0.01,
cancel_terminate_timeout=0.2,
)
assert lines == ["ready"]
def test_run_command_avoids_runtime_unsafe_nested_annotations() -> None: def test_run_command_avoids_runtime_unsafe_nested_annotations() -> None:
source = inspect.getsource(run_command) source = inspect.getsource(run_command)
assert "subprocess.Popen[str].stdout" not in source assert "subprocess.Popen[str].stdout" not in source

View file

@ -1,11 +1,13 @@
from datetime import UTC, datetime
import time import time
from flask import Blueprint, Response, current_app, request from flask import Blueprint, Response, current_app, redirect, render_template, request
from sqlalchemy import select from sqlalchemy import select
from l4d2web.auth import current_user, require_login from l4d2web.auth import current_user, is_safe_next, require_login
from l4d2web.db import session_scope from l4d2web.db import session_scope
from l4d2web.models import Job, JobLog from l4d2web.models import Job, JobLog, Server, User
from l4d2web.services.job_worker import append_job_log
bp = Blueprint("job", __name__) bp = Blueprint("job", __name__)
@ -19,6 +21,67 @@ def format_sse_event(seq: int, event: str, data: str) -> str:
return "\n".join(lines) + "\n\n" return "\n".join(lines) + "\n\n"
def can_access_job(job: Job, user: User) -> bool:
return user.admin or job.user_id == user.id
@bp.get("/jobs/<int:job_id>")
@require_login
def job_detail(job_id: int) -> str | Response:
user = current_user()
assert user is not None
with session_scope() as db:
row = db.execute(
select(Job, User, Server)
.join(User, User.id == Job.user_id)
.outerjoin(Server, Server.id == Job.server_id)
.where(Job.id == job_id)
).first()
if row is None:
return Response(status=404)
job, owner, server = row
if not can_access_job(job, user):
return Response(status=403)
return render_template("job_detail.html", job=job, owner=owner, server=server)
@bp.post("/jobs/<int:job_id>/cancel")
@require_login
def cancel_job(job_id: int) -> Response:
user = current_user()
assert user is not None
next_url = request.form.get("next")
if not is_safe_next(next_url):
next_url = f"/jobs/{job_id}"
with session_scope() as db:
job = db.scalar(select(Job).where(Job.id == job_id))
if job is None:
return Response(status=404)
if not can_access_job(job, user):
return Response(status=403)
now = datetime.now(UTC)
if job.state == "queued":
job.state = "cancelled"
job.exit_code = 1
job.finished_at = now
job.updated_at = now
append_job_log(db, job.id, "stderr", "job cancelled before execution")
elif job.state == "running":
job.state = "cancelling"
job.updated_at = now
append_job_log(db, job.id, "stderr", "job cancellation requested; attempting to terminate running process")
elif job.state == "cancelling":
return redirect(next_url)
else:
return Response("job cannot be cancelled", status=409)
return redirect(next_url)
@bp.get("/jobs/<int:job_id>/stream") @bp.get("/jobs/<int:job_id>/stream")
@require_login @require_login
def stream_job(job_id: int) -> Response: def stream_job(job_id: int) -> Response:
@ -30,9 +93,11 @@ def stream_job(job_id: int) -> Response:
poll_seconds = float(current_app.config.get("JOB_WORKER_POLL_SECONDS", 1)) poll_seconds = float(current_app.config.get("JOB_WORKER_POLL_SECONDS", 1))
with session_scope() as db: with session_scope() as db:
job = db.scalar(select(Job).where(Job.id == job_id, Job.user_id == user.id)) job = db.scalar(select(Job).where(Job.id == job_id))
if job is None: if job is None:
return Response(status=404) return Response(status=404)
if not can_access_job(job, user):
return Response(status=403)
def generate(): def generate():
next_seq = last_seq next_seq = last_seq

View file

@ -81,30 +81,44 @@ def server_detail(server_id: int):
if server is None: if server is None:
return Response(status=404) return Response(status=404)
blueprint = db.scalar(select(BlueprintModel).where(BlueprintModel.id == server.blueprint_id)) blueprint = db.scalar(select(BlueprintModel).where(BlueprintModel.id == server.blueprint_id))
overlay_rows = db.execute( recent_job_rows = db.execute(
select(Overlay.name) select(Job, User, Server)
.join(BlueprintOverlay, BlueprintOverlay.overlay_id == Overlay.id) .join(User, User.id == Job.user_id)
.where(BlueprintOverlay.blueprint_id == server.blueprint_id) .outerjoin(Server, Server.id == Job.server_id)
.order_by(BlueprintOverlay.position)
).all()
latest_job = db.scalar(
select(Job)
.where(Job.server_id == server.id) .where(Job.server_id == server.id)
.order_by(Job.created_at.desc()) .order_by(Job.created_at.desc())
.limit(1) .limit(5)
) ).all()
return render_template( return render_template(
"server_detail.html", "server_detail.html",
server=server, server=server,
blueprint=blueprint, blueprint=blueprint,
overlay_names=[row[0] for row in overlay_rows], recent_job_rows=recent_job_rows,
arguments=json.loads(blueprint.arguments) if blueprint is not None else [],
config_lines=json.loads(blueprint.config) if blueprint is not None else [],
latest_job=latest_job,
) )
@bp.get("/servers/<int:server_id>/jobs")
@require_login
def server_jobs_page(server_id: int):
user = current_user()
assert user is not None
with session_scope() as db:
server = db.scalar(select(Server).where(Server.id == server_id, Server.user_id == user.id))
if server is None:
return Response(status=404)
rows = db.execute(
select(Job, User, Server)
.join(User, User.id == Job.user_id)
.outerjoin(Server, Server.id == Job.server_id)
.where(Job.server_id == server.id)
.order_by(Job.created_at.desc())
).all()
return render_template("server_jobs.html", server=server, rows=rows)
@bp.get("/overlays") @bp.get("/overlays")
@require_login @require_login
def overlays() -> str: def overlays() -> str:

View file

@ -4,6 +4,7 @@ import subprocess
import threading import threading
import time import time
from l4d2host.process import CommandCancelledError
from sqlalchemy import func, select from sqlalchemy import func, select
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@ -12,6 +13,7 @@ from l4d2web.models import Job, JobLog, Server
TERMINAL_JOB_STATES = {"succeeded", "failed", "cancelled"} TERMINAL_JOB_STATES = {"succeeded", "failed", "cancelled"}
ACTIVE_JOB_STATES = {"running", "cancelling"}
SERVER_OPERATIONS = {"initialize", "start", "stop", "delete"} SERVER_OPERATIONS = {"initialize", "start", "stop", "delete"}
_claim_lock = threading.Lock() _claim_lock = threading.Lock()
@ -38,7 +40,7 @@ def can_start(job, state: SchedulerState) -> bool:
def build_scheduler_state(session: Session) -> SchedulerState: def build_scheduler_state(session: Session) -> SchedulerState:
state = SchedulerState() state = SchedulerState()
running_jobs = session.scalars(select(Job).where(Job.state == "running")).all() running_jobs = session.scalars(select(Job).where(Job.state.in_(ACTIVE_JOB_STATES))).all()
for job in running_jobs: for job in running_jobs:
if job.operation == "install": if job.operation == "install":
state.install_running = True state.install_running = True
@ -93,26 +95,43 @@ def run_job(job_id: int) -> None:
def on_stderr(line: str) -> None: def on_stderr(line: str) -> None:
append_job_log_line(job_id, "stderr", line, max_chars=max_chars) append_job_log_line(job_id, "stderr", line, max_chars=max_chars)
def should_cancel() -> bool:
with session_scope() as db:
state = db.scalar(select(Job.state).where(Job.id == job_id))
return state == "cancelling"
def raise_if_cancelled() -> None:
if should_cancel():
raise CommandCancelledError(returncode=1, cmd=[operation], output="", stderr="")
try: try:
if operation == "install": if operation == "install":
l4d2_facade.install_runtime(on_stdout=on_stdout, on_stderr=on_stderr) l4d2_facade.install_runtime(on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
elif operation in SERVER_OPERATIONS and server_id is None: elif operation in SERVER_OPERATIONS and server_id is None:
raise ValueError(f"{operation} job has no server_id") raise ValueError(f"{operation} job has no server_id")
elif operation == "initialize": elif operation == "initialize":
l4d2_facade.initialize_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr) l4d2_facade.initialize_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
elif operation == "start": elif operation == "start":
l4d2_facade.initialize_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr) l4d2_facade.initialize_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
l4d2_facade.start_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr) raise_if_cancelled()
l4d2_facade.start_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
elif operation == "stop": elif operation == "stop":
l4d2_facade.stop_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr) l4d2_facade.stop_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
elif operation == "delete": elif operation == "delete":
l4d2_facade.delete_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr) l4d2_facade.delete_server(server_id, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
else: else:
raise ValueError(f"unknown job operation: {operation}") raise ValueError(f"unknown job operation: {operation}")
if server_id is not None: if server_id is not None:
refresh_server_actual_state_after_job(job_id, server_id) refresh_server_actual_state_after_job(job_id, server_id)
finish_job(job_id, "succeeded", 0) finish_job(job_id, "succeeded", 0)
except CommandCancelledError as exc:
error = "job cancelled; runtime state may be partial"
append_job_log_line(job_id, "stderr", error, max_chars=max_chars)
if server_id is not None:
refresh_server_actual_state_after_job(job_id, server_id)
exit_code = exc.returncode if exc.returncode is not None else 1
finish_job(job_id, "cancelled", exit_code, error=error)
except subprocess.CalledProcessError as exc: except subprocess.CalledProcessError as exc:
error = exc.stderr or str(exc) error = exc.stderr or str(exc)
if exc.stderr: if exc.stderr:
@ -154,9 +173,9 @@ def append_job_log_line(job_id: int, stream: str, line: str, max_chars: int = 40
def recover_stale_jobs() -> int: def recover_stale_jobs() -> int:
now = datetime.now(UTC) now = datetime.now(UTC)
with session_scope() as db: with session_scope() as db:
jobs = db.scalars(select(Job).where(Job.state == "running")).all() jobs = db.scalars(select(Job).where(Job.state.in_(ACTIVE_JOB_STATES))).all()
for job in jobs: for job in jobs:
job.state = "failed" job.state = "cancelled" if job.state == "cancelling" else "failed"
job.exit_code = 1 job.exit_code = 1
job.finished_at = now job.finished_at = now
job.updated_at = now job.updated_at = now

View file

@ -41,32 +41,32 @@ def load_server_blueprint_bundle(server_id: int) -> tuple[Server, Blueprint, lis
return server, blueprint, overlay_names return server, blueprint, overlay_names
def install_runtime(on_stdout=None, on_stderr=None) -> None: def install_runtime(on_stdout=None, on_stderr=None, should_cancel=None) -> None:
SteamInstaller().install_or_update(on_stdout=on_stdout, on_stderr=on_stderr) SteamInstaller().install_or_update(on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
def initialize_server(server_id: int, on_stdout=None, on_stderr=None) -> None: def initialize_server(server_id: int, on_stdout=None, on_stderr=None, should_cancel=None) -> None:
server, blueprint, overlay_names = load_server_blueprint_bundle(server_id) server, blueprint, overlay_names = load_server_blueprint_bundle(server_id)
spec_path = write_temp_spec(build_server_spec_payload(server, blueprint, overlay_names)) spec_path = write_temp_spec(build_server_spec_payload(server, blueprint, overlay_names))
try: try:
initialize_instance(server.name, spec_path, on_stdout=on_stdout, on_stderr=on_stderr) initialize_instance(server.name, spec_path, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
finally: finally:
spec_path.unlink(missing_ok=True) spec_path.unlink(missing_ok=True)
def start_server(server_id: int, on_stdout=None, on_stderr=None) -> None: def start_server(server_id: int, on_stdout=None, on_stderr=None, should_cancel=None) -> None:
server, _, _ = load_server_blueprint_bundle(server_id) server, _, _ = load_server_blueprint_bundle(server_id)
start_instance(server.name, on_stdout=on_stdout, on_stderr=on_stderr) start_instance(server.name, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
def stop_server(server_id: int, on_stdout=None, on_stderr=None) -> None: def stop_server(server_id: int, on_stdout=None, on_stderr=None, should_cancel=None) -> None:
server, _, _ = load_server_blueprint_bundle(server_id) server, _, _ = load_server_blueprint_bundle(server_id)
stop_instance(server.name, on_stdout=on_stdout, on_stderr=on_stderr) stop_instance(server.name, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
def delete_server(server_id: int, on_stdout=None, on_stderr=None) -> None: def delete_server(server_id: int, on_stdout=None, on_stderr=None, should_cancel=None) -> None:
server, _, _ = load_server_blueprint_bundle(server_id) server, _, _ = load_server_blueprint_bundle(server_id)
delete_instance(server.name, on_stdout=on_stdout, on_stderr=on_stderr) delete_instance(server.name, on_stdout=on_stdout, on_stderr=on_stderr, should_cancel=should_cancel)
def server_status(server_name: str): def server_status(server_name: str):

View file

@ -11,8 +11,8 @@
--color-warning: #a15c07; --color-warning: #a15c07;
--color-success: #067647; --color-success: #067647;
--color-focus: #2563eb; --color-focus: #2563eb;
--color-log-bg: #111827; --color-log-bg: #f8fafc;
--color-log-text: #e5e7eb; --color-log-text: #18181b;
--space-base: 0.25rem; --space-base: 0.25rem;
--space-xs: var(--space-base); --space-xs: var(--space-base);
@ -43,6 +43,8 @@
--color-warning: #fcd34d; --color-warning: #fcd34d;
--color-success: #86efac; --color-success: #86efac;
--color-focus: #bfdbfe; --color-focus: #bfdbfe;
--color-log-bg: #111827;
--color-log-text: #e5e7eb;
} }
} }

View file

@ -0,0 +1,42 @@
<table class="table">
<thead>
<tr>
<th>ID</th>
<th>Operation</th>
<th>State</th>
{% if show_user %}<th>User</th>{% endif %}
{% if show_server %}<th>Server</th>{% endif %}
<th>Created</th>
<th>Finished</th>
{% if show_cancel %}<th>Action</th>{% endif %}
</tr>
</thead>
<tbody>
{% for job, user, server in rows %}
<tr>
<td><a href="/jobs/{{ job.id }}">#{{ job.id }}</a></td>
<td>{{ job.operation }}</td>
<td>{{ job.state }}</td>
{% if show_user %}<td>{{ user.username }}</td>{% endif %}
{% if show_server %}<td>{% if server %}<a href="/servers/{{ server.id }}">{{ server.name }}</a>{% else %}-{% endif %}</td>{% endif %}
<td>{{ job.created_at }}</td>
<td>{{ job.finished_at or "-" }}</td>
{% if show_cancel %}
<td>
{% if job.state in ["queued", "running"] %}
<form method="post" action="/jobs/{{ job.id }}/cancel" class="inline-form">
<input type="hidden" name="csrf_token" value="{{ session.get('csrf_token', '') }}">
<input type="hidden" name="next" value="{{ cancel_next or request.path }}">
<button class="danger" type="submit">cancel</button>
</form>
{% else %}
<span class="muted">-</span>
{% endif %}
</td>
{% endif %}
</tr>
{% else %}
<tr><td colspan="8" class="muted">No jobs found.</td></tr>
{% endfor %}
</tbody>
</table>

View file

@ -5,23 +5,10 @@
{% block content %} {% block content %}
<section class="panel"> <section class="panel">
<h1>Jobs</h1> <h1>Jobs</h1>
<table class="table"> {% set show_user = true %}
<thead><tr><th>ID</th><th>Operation</th><th>State</th><th>User</th><th>Server</th><th>Created</th><th>Finished</th></tr></thead> {% set show_server = true %}
<tbody> {% set show_cancel = true %}
{% for job, user, server in rows %} {% set cancel_next = "/admin/jobs" %}
<tr> {% include "_job_table.html" %}
<td>{{ job.id }}</td>
<td>{{ job.operation }}</td>
<td>{{ job.state }}</td>
<td>{{ user.username }}</td>
<td>{% if server %}<a href="/servers/{{ server.id }}">{{ server.name }}</a>{% else %}-{% endif %}</td>
<td>{{ job.created_at }}</td>
<td>{{ job.finished_at or "-" }}</td>
</tr>
{% else %}
<tr><td colspan="7" class="muted">No jobs found.</td></tr>
{% endfor %}
</tbody>
</table>
</section> </section>
{% endblock %} {% endblock %}

View file

@ -0,0 +1,36 @@
{% extends "base.html" %}
{% block title %}Job #{{ job.id }} | left4me{% endblock %}
{% block content %}
<section class="panel">
<div class="page-heading">
<h1>Job #{{ job.id }}</h1>
{% if job.state in ["queued", "running"] %}
<form method="post" action="/jobs/{{ job.id }}/cancel" class="inline-form">
<input type="hidden" name="csrf_token" value="{{ session.get('csrf_token', '') }}">
<input type="hidden" name="next" value="/jobs/{{ job.id }}">
<button class="danger" type="submit">cancel</button>
</form>
{% endif %}
</div>
<table class="definition-table">
<tbody>
<tr><th>Operation</th><td>{{ job.operation }}</td></tr>
<tr><th>State</th><td>{{ job.state }}</td></tr>
<tr><th>User</th><td>{{ owner.username }}</td></tr>
<tr><th>Server</th><td>{% if server %}<a href="/servers/{{ server.id }}">{{ server.name }}</a>{% else %}-{% endif %}</td></tr>
<tr><th>Created</th><td>{{ job.created_at }}</td></tr>
<tr><th>Started</th><td>{{ job.started_at or "-" }}</td></tr>
<tr><th>Finished</th><td>{{ job.finished_at or "-" }}</td></tr>
<tr><th>Exit code</th><td>{{ job.exit_code if job.exit_code is not none else "-" }}</td></tr>
</tbody>
</table>
</section>
<section class="panel">
<h2>Job Logs</h2>
<pre class="log-stream" data-sse-url="/jobs/{{ job.id }}/stream"></pre>
</section>
{% endblock %}

View file

@ -33,25 +33,16 @@
</section> </section>
<section class="panel"> <section class="panel">
<h2>Blueprint</h2> <div class="page-heading">
<h3>Overlay order</h3> <h2>Recent Jobs</h2>
<ol> <a href="/servers/{{ server.id }}/jobs">View all jobs</a>
{% for name in overlay_names %}<li>{{ name }}</li>{% else %}<li class="muted">No overlays configured.</li>{% endfor %} </div>
</ol> {% set rows = recent_job_rows %}
<h3>Arguments</h3> {% set show_user = false %}
<pre class="code-block">{{ arguments | join('\n') }}</pre> {% set show_server = false %}
<h3>Config</h3> {% set show_cancel = true %}
<pre class="code-block">{{ config_lines | join('\n') }}</pre> {% set cancel_next = "/servers/" ~ server.id %}
</section> {% include "_job_table.html" %}
<section class="panel">
<h2>Current / Recent Job</h2>
{% if latest_job %}
<table class="definition-table"><tbody><tr><th>Operation</th><td>{{ latest_job.operation }}</td></tr><tr><th>State</th><td>{{ latest_job.state }}</td></tr></tbody></table>
<pre class="log-stream" data-sse-url="/jobs/{{ latest_job.id }}/stream"></pre>
{% else %}
<p class="muted">No jobs have run for this server.</p>
{% endif %}
</section> </section>
<section class="panel"> <section class="panel">

View file

@ -0,0 +1,17 @@
{% extends "base.html" %}
{% block title %}Jobs for {{ server.name }} | left4me{% endblock %}
{% block content %}
<section class="panel">
<div class="page-heading">
<h1>Jobs for {{ server.name }}</h1>
<a href="/servers/{{ server.id }}">Back to server</a>
</div>
{% set show_user = false %}
{% set show_server = false %}
{% set show_cancel = true %}
{% set cancel_next = "/servers/" ~ server.id ~ "/jobs" %}
{% include "_job_table.html" %}
</section>
{% endblock %}

View file

@ -6,6 +6,7 @@ import subprocess
import pytest import pytest
from sqlalchemy import select from sqlalchemy import select
from l4d2host.process import CommandCancelledError
from l4d2web.auth import hash_password from l4d2web.auth import hash_password
from l4d2web.db import init_db, session_scope from l4d2web.db import init_db, session_scope
from l4d2web.models import Blueprint, Job, Server, User from l4d2web.models import Blueprint, Job, Server, User
@ -122,12 +123,14 @@ def test_successful_start_job_logs_and_refreshes_server_state(seeded_worker, mon
job_id = add_job(ids.user, "start", server_id=ids.server_one) job_id = add_job(ids.user, "start", server_id=ids.server_one)
calls = [] calls = []
def fake_initialize(server_id, *, on_stdout=None, on_stderr=None): def fake_initialize(server_id, *, on_stdout=None, on_stderr=None, should_cancel=None):
del should_cancel
calls.append(("initialize", server_id)) calls.append(("initialize", server_id))
on_stdout("initialized") on_stdout("initialized")
on_stderr("init warning") on_stderr("init warning")
def fake_start(server_id, *, on_stdout=None, on_stderr=None): def fake_start(server_id, *, on_stdout=None, on_stderr=None, should_cancel=None):
del should_cancel
calls.append(("start", server_id)) calls.append(("start", server_id))
on_stdout("started") on_stdout("started")
@ -245,6 +248,54 @@ def test_same_server_jobs_do_not_overlap(seeded_worker, monkeypatch) -> None:
assert load_job(queued_id).state == "queued" assert load_job(queued_id).state == "queued"
def test_same_server_jobs_wait_while_job_is_cancelling(seeded_worker, monkeypatch) -> None:
app, ids = seeded_worker
add_job(ids.user, "start", server_id=ids.server_one, state="cancelling")
queued_id = add_job(ids.user, "stop", server_id=ids.server_one)
monkeypatch.setattr(l4d2_facade, "stop_server", lambda server_id, **kwargs: pytest.fail("must not run"))
with app.app_context():
assert run_worker_once() is False
assert load_job(queued_id).state == "queued"
def test_cancelled_process_finishes_job_as_cancelled(seeded_worker, monkeypatch) -> None:
app, ids = seeded_worker
job_id = add_job(ids.user, "stop", server_id=ids.server_one)
def fake_stop(server_id, *, on_stdout=None, on_stderr=None, should_cancel=None):
assert server_id == ids.server_one
assert should_cancel is not None
with session_scope() as session:
job = session.scalar(select(Job).where(Job.id == job_id))
assert job is not None
job.state = "cancelling"
assert should_cancel() is True
on_stderr("terminating")
raise CommandCancelledError(returncode=-15, cmd=["stop"], output="", stderr="")
monkeypatch.setattr(l4d2_facade, "stop_server", fake_stop)
monkeypatch.setattr(l4d2_facade, "server_status", lambda name: SimpleNamespace(state="unknown"))
with app.app_context():
assert run_worker_once() is True
with session_scope() as session:
job = session.scalar(select(Job).where(Job.id == job_id))
server = session.scalar(select(Server).where(Server.id == ids.server_one))
lines = [row.line for row in job_logs_for(session, job_id)]
assert job is not None
assert job.state == "cancelled"
assert job.exit_code == -15
assert job.finished_at is not None
assert server is not None
assert server.last_error == "job cancelled; runtime state may be partial"
assert "terminating" in lines
assert "job cancelled; runtime state may be partial" in lines
def test_different_server_jobs_can_be_claimed_while_other_server_runs(seeded_worker, monkeypatch) -> None: def test_different_server_jobs_can_be_claimed_while_other_server_runs(seeded_worker, monkeypatch) -> None:
app, ids = seeded_worker app, ids = seeded_worker
add_job(ids.user, "start", server_id=ids.server_one, state="running") add_job(ids.user, "start", server_id=ids.server_one, state="running")

View file

@ -4,7 +4,7 @@ from pathlib import Path
from l4d2web.app import create_app from l4d2web.app import create_app
from l4d2web.auth import hash_password from l4d2web.auth import hash_password
from l4d2web.db import init_db, session_scope from l4d2web.db import init_db, session_scope
from l4d2web.models import Blueprint, BlueprintOverlay, Job, Overlay, Server, User from l4d2web.models import Blueprint, BlueprintOverlay, Job, JobLog, Overlay, Server, User
@pytest.fixture @pytest.fixture
@ -113,6 +113,16 @@ def test_css_tokens_define_neutral_light_and_dark_theme() -> None:
assert "radial-gradient" not in Path("l4d2web/static/css/layout.css").read_text() assert "radial-gradient" not in Path("l4d2web/static/css/layout.css").read_text()
def test_log_tokens_follow_light_and_dark_theme() -> None:
css = Path("l4d2web/static/css/tokens.css").read_text()
assert "--color-log-bg: #f8fafc;" in css
assert "--color-log-text: #18181b;" in css
dark_theme = css.split("@media (prefers-color-scheme: dark)", 1)[1]
assert "--color-log-bg: #111827;" in dark_theme
assert "--color-log-text: #e5e7eb;" in dark_theme
def test_server_detail_shows_operations_and_logs(auth_client_with_server) -> None: def test_server_detail_shows_operations_and_logs(auth_client_with_server) -> None:
response = auth_client_with_server.get("/servers/1") response = auth_client_with_server.get("/servers/1")
text = response.get_data(as_text=True) text = response.get_data(as_text=True)
@ -124,9 +134,134 @@ def test_server_detail_shows_operations_and_logs(auth_client_with_server) -> Non
assert 'action="/servers/1/initialize"' in text assert 'action="/servers/1/initialize"' in text
assert 'action="/servers/1/delete"' in text assert 'action="/servers/1/delete"' in text
assert 'href="/blueprints/1"' in text assert 'href="/blueprints/1"' in text
assert "<h2>Blueprint</h2>" not in text
assert "standard" not in text
assert 'data-sse-url="/servers/1/logs/stream"' in text assert 'data-sse-url="/servers/1/logs/stream"' in text
def test_server_detail_shows_recent_jobs(auth_client_with_server) -> None:
with session_scope() as session:
job = Job(user_id=1, server_id=1, operation="start", state="queued")
session.add(job)
session.flush()
job_id = job.id
response = auth_client_with_server.get("/servers/1")
text = response.get_data(as_text=True)
assert response.status_code == 200
assert "Recent Jobs" in text
assert 'href="/servers/1/jobs"' in text
assert f'href="/jobs/{job_id}"' in text
assert 'action="/jobs/' in text
def test_server_jobs_page_lists_server_jobs(auth_client_with_server) -> None:
with session_scope() as session:
session.add(Job(user_id=1, server_id=1, operation="initialize", state="succeeded"))
session.add(Job(user_id=1, server_id=1, operation="stop", state="queued"))
response = auth_client_with_server.get("/servers/1/jobs")
text = response.get_data(as_text=True)
assert response.status_code == 200
assert "Jobs for alpha" in text
assert "initialize" in text
assert "stop" in text
assert 'href="/servers/1"' in text
def test_job_detail_shows_metadata_and_log_stream(auth_client_with_server) -> None:
with session_scope() as session:
job = Job(user_id=1, server_id=1, operation="start", state="running")
session.add(job)
session.flush()
session.add(JobLog(job_id=job.id, seq=1, stream="stdout", line="starting"))
job_id = job.id
response = auth_client_with_server.get(f"/jobs/{job_id}")
text = response.get_data(as_text=True)
assert response.status_code == 200
assert f"Job #{job_id}" in text
assert "start" in text
assert "running" in text
assert 'href="/servers/1"' in text
assert f'data-sse-url="/jobs/{job_id}/stream"' in text
def test_owner_can_cancel_queued_job(auth_client_with_server) -> None:
with session_scope() as session:
job = Job(user_id=1, server_id=1, operation="stop", state="queued")
session.add(job)
session.flush()
job_id = job.id
with auth_client_with_server.session_transaction() as sess:
sess["csrf_token"] = "test-token"
response = auth_client_with_server.post(
f"/jobs/{job_id}/cancel",
data={"next": f"/jobs/{job_id}"},
headers={"X-CSRF-Token": "test-token"},
)
assert response.status_code == 302
assert response.headers["Location"].endswith(f"/jobs/{job_id}")
with session_scope() as session:
cancelled = session.query(Job).filter(Job.id == job_id).one()
lines = session.query(JobLog).filter(JobLog.job_id == job_id).all()
assert cancelled.state == "cancelled"
assert cancelled.exit_code == 1
assert cancelled.finished_at is not None
assert [line.line for line in lines] == ["job cancelled before execution"]
def test_owner_can_request_running_job_cancel(auth_client_with_server) -> None:
with session_scope() as session:
job = Job(user_id=1, server_id=1, operation="start", state="running")
session.add(job)
session.flush()
job_id = job.id
with auth_client_with_server.session_transaction() as sess:
sess["csrf_token"] = "test-token"
response = auth_client_with_server.post(
f"/jobs/{job_id}/cancel",
data={"next": f"/jobs/{job_id}"},
headers={"X-CSRF-Token": "test-token"},
)
assert response.status_code == 302
with session_scope() as session:
cancelling = session.query(Job).filter(Job.id == job_id).one()
lines = session.query(JobLog).filter(JobLog.job_id == job_id).all()
assert cancelling.state == "cancelling"
assert cancelling.finished_at is None
assert [line.line for line in lines] == ["job cancellation requested; attempting to terminate running process"]
def test_non_owner_cannot_view_or_cancel_job(auth_client_with_server) -> None:
with session_scope() as session:
other = User(username="other", password_digest=hash_password("secret"), admin=False)
session.add(other)
session.flush()
job = Job(user_id=other.id, server_id=None, operation="install", state="queued")
session.add(job)
session.flush()
job_id = job.id
with auth_client_with_server.session_transaction() as sess:
sess["csrf_token"] = "test-token"
assert auth_client_with_server.get(f"/jobs/{job_id}").status_code == 403
assert (
auth_client_with_server.post(f"/jobs/{job_id}/cancel", headers={"X-CSRF-Token": "test-token"}).status_code
== 403
)
def test_servers_page_links_server_names(auth_client_with_server) -> None: def test_servers_page_links_server_names(auth_client_with_server) -> None:
response = auth_client_with_server.get("/servers") response = auth_client_with_server.get("/servers")
text = response.get_data(as_text=True) text = response.get_data(as_text=True)
@ -155,6 +290,7 @@ def test_admin_can_use_admin_pages(tmp_path, monkeypatch) -> None:
admin = User(username="admin", password_digest=hash_password("secret"), admin=True) admin = User(username="admin", password_digest=hash_password("secret"), admin=True)
session.add(admin) session.add(admin)
session.flush() session.flush()
session.add(Job(user_id=admin.id, server_id=None, operation="install", state="queued"))
admin_id = admin.id admin_id = admin.id
client = app.test_client() client = app.test_client()
@ -165,10 +301,40 @@ def test_admin_can_use_admin_pages(tmp_path, monkeypatch) -> None:
assert admin_page.status_code == 200 assert admin_page.status_code == 200
assert 'action="/admin/install"' in admin_page.get_data(as_text=True) assert 'action="/admin/install"' in admin_page.get_data(as_text=True)
assert client.get("/admin/users").status_code == 200 assert client.get("/admin/users").status_code == 200
assert client.get("/admin/jobs").status_code == 200 jobs_response = client.get("/admin/jobs")
assert jobs_response.status_code == 200
assert 'href="/jobs/1"' in jobs_response.get_data(as_text=True)
assert 'action="/jobs/1/cancel"' in jobs_response.get_data(as_text=True)
assert 'href="/admin"' in client.get("/dashboard").get_data(as_text=True) assert 'href="/admin"' in client.get("/dashboard").get_data(as_text=True)
def test_admin_can_view_other_users_job(tmp_path, monkeypatch) -> None:
db_url = f"sqlite:///{tmp_path/'admin-job-view.db'}"
monkeypatch.setenv("DATABASE_URL", db_url)
app = create_app({"TESTING": True, "DATABASE_URL": db_url, "SECRET_KEY": "test"})
init_db()
with session_scope() as session:
admin = User(username="admin", password_digest=hash_password("secret"), admin=True)
user = User(username="alice", password_digest=hash_password("secret"), admin=False)
session.add_all([admin, user])
session.flush()
job = Job(user_id=user.id, server_id=None, operation="install", state="queued")
session.add(job)
session.flush()
admin_id = admin.id
job_id = job.id
client = app.test_client()
with client.session_transaction() as sess:
sess["user_id"] = admin_id
response = client.get(f"/jobs/{job_id}")
assert response.status_code == 200
assert "alice" in response.get_data(as_text=True)
def test_admin_can_enqueue_runtime_install_job(tmp_path, monkeypatch) -> None: def test_admin_can_enqueue_runtime_install_job(tmp_path, monkeypatch) -> None:
db_url = f"sqlite:///{tmp_path/'admin-install.db'}" db_url = f"sqlite:///{tmp_path/'admin-install.db'}"
monkeypatch.setenv("DATABASE_URL", db_url) monkeypatch.setenv("DATABASE_URL", db_url)