job_worker: don't duplicate streamed stderr on HostCommandError
services/host_commands.run_command pumps each stderr line into JobLog
via on_stderr (job_worker.py:215) before it raises HostCommandError —
appending exc.stderr again as a single row produced a second copy of
the entire traceback truncated at JOB_LOG_LINE_MAX_CHARS (4096), which
was visible as the awkward duplicated/cut-off second block at the end
of failed install logs.
Split the existing `except subprocess.CalledProcessError` into two:
except HostCommandError: stderr already streamed — just record exit
code + last error summary on the job/server row. No log append.
except subprocess.CalledProcessError: catches raw CalledProcessErrors
raised outside host_commands (no pump ran), so still append stderr
to the log. Preserves the path test_called_process_error_fails_job
exercises.
New regression test asserts a HostCommandError with multi-line stderr
doesn't land as a single concatenated JobLog row.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
363f429c7a
commit
e1149704c8
2 changed files with 46 additions and 2 deletions
|
|
@ -20,7 +20,7 @@ from l4d2web.models import (
|
||||||
Server,
|
Server,
|
||||||
WorkshopItem,
|
WorkshopItem,
|
||||||
)
|
)
|
||||||
from l4d2web.services.host_commands import CommandCancelledError
|
from l4d2web.services.host_commands import CommandCancelledError, HostCommandError
|
||||||
from l4d2web.services.server_identity import server_unit_name
|
from l4d2web.services.server_identity import server_unit_name
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -344,7 +344,17 @@ def run_job(job_id: int) -> None:
|
||||||
refresh_server_actual_state_after_job(job_id, server_id)
|
refresh_server_actual_state_after_job(job_id, server_id)
|
||||||
exit_code = exc.returncode if exc.returncode is not None else 1
|
exit_code = exc.returncode if exc.returncode is not None else 1
|
||||||
finish_job(job_id, "cancelled", exit_code, error=error)
|
finish_job(job_id, "cancelled", exit_code, error=error)
|
||||||
|
except HostCommandError as exc:
|
||||||
|
# stderr was already pumped line-by-line into JobLog by
|
||||||
|
# host_commands.run_command before it raised; don't re-append exc.stderr
|
||||||
|
# (would duplicate the entire traceback as one 4KB-truncated row).
|
||||||
|
error = exc.stderr or str(exc)
|
||||||
|
if server_id is not None:
|
||||||
|
refresh_server_actual_state_after_job(job_id, server_id)
|
||||||
|
finish_job(job_id, "failed", exc.returncode, error=error)
|
||||||
except subprocess.CalledProcessError as exc:
|
except subprocess.CalledProcessError as exc:
|
||||||
|
# Raw CalledProcessError raised outside host_commands (no streaming
|
||||||
|
# pump ran); its stderr hasn't been logged yet.
|
||||||
error = exc.stderr or str(exc)
|
error = exc.stderr or str(exc)
|
||||||
if exc.stderr:
|
if exc.stderr:
|
||||||
append_job_log_line(job_id, "stderr", str(exc.stderr), max_chars=max_chars)
|
append_job_log_line(job_id, "stderr", str(exc.stderr), max_chars=max_chars)
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ from l4d2web.models import (
|
||||||
WorkshopItem,
|
WorkshopItem,
|
||||||
)
|
)
|
||||||
from l4d2web.services import l4d2_facade
|
from l4d2web.services import l4d2_facade
|
||||||
from l4d2web.services.host_commands import CommandCancelledError
|
from l4d2web.services.host_commands import CommandCancelledError, HostCommandError
|
||||||
from l4d2web.services.job_worker import (
|
from l4d2web.services.job_worker import (
|
||||||
SchedulerState,
|
SchedulerState,
|
||||||
build_scheduler_state,
|
build_scheduler_state,
|
||||||
|
|
@ -219,6 +219,40 @@ def test_called_process_error_fails_job_and_sets_server_error(seeded_worker, mon
|
||||||
assert "stop failed" in lines
|
assert "stop failed" in lines
|
||||||
|
|
||||||
|
|
||||||
|
def test_host_command_error_does_not_duplicate_stderr_in_log(seeded_worker, monkeypatch) -> None:
|
||||||
|
"""HostCommandError means host_commands.run_command's pump already
|
||||||
|
streamed stderr to JobLog line-by-line; the worker must not also append
|
||||||
|
exc.stderr as a single row (would duplicate the entire traceback)."""
|
||||||
|
app, ids = seeded_worker
|
||||||
|
job_id = add_job(ids.user, "stop", server_id=ids.server_one)
|
||||||
|
|
||||||
|
multiline_stderr = "line1\nline2\nline3"
|
||||||
|
|
||||||
|
def fail_stop(server_id, **kwargs):
|
||||||
|
raise HostCommandError(returncode=7, cmd=["stop"], output="", stderr=multiline_stderr)
|
||||||
|
|
||||||
|
monkeypatch.setattr(l4d2_facade, "stop_server", fail_stop)
|
||||||
|
monkeypatch.setattr(l4d2_facade, "server_status", lambda name: SimpleNamespace(state="stopped"))
|
||||||
|
|
||||||
|
with app.app_context():
|
||||||
|
assert run_worker_once() is True
|
||||||
|
|
||||||
|
with session_scope() as session:
|
||||||
|
job = session.scalar(select(Job).where(Job.id == job_id))
|
||||||
|
server = session.scalar(select(Server).where(Server.id == ids.server_one))
|
||||||
|
lines = [row.line for row in job_logs_for(session, job_id)]
|
||||||
|
|
||||||
|
assert job is not None
|
||||||
|
assert job.state == "failed"
|
||||||
|
assert job.exit_code == 7
|
||||||
|
assert server is not None
|
||||||
|
assert server.last_error == multiline_stderr
|
||||||
|
# The pump didn't run in this test (mocked facade), so individual stderr
|
||||||
|
# lines aren't expected. What matters is the multi-line blob is NOT
|
||||||
|
# appended as a single JobLog row.
|
||||||
|
assert multiline_stderr not in lines
|
||||||
|
|
||||||
|
|
||||||
def test_refresh_failure_does_not_hide_operation_failure(seeded_worker, monkeypatch) -> None:
|
def test_refresh_failure_does_not_hide_operation_failure(seeded_worker, monkeypatch) -> None:
|
||||||
app, ids = seeded_worker
|
app, ids = seeded_worker
|
||||||
job_id = add_job(ids.user, "stop", server_id=ids.server_one)
|
job_id = add_job(ids.user, "stop", server_id=ids.server_one)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue