Files
huskies/server/src/agents/pool/auto_assign/watchdog/tests/mod.rs
T
Timmy 6feb68f3e3 fix(923): watchdog counts only tool-using turns; narration-only turns no longer burn budget
Observed: stories 917, 918, 920, 910 all turn-limit-killed despite producing
real commits. Tally across their session logs shows 30–55% of assistant
turns were pure narration ("I'll read X next", "Now let me check Y") with
no tool_use. At 80 max_turns the effective work budget was ~44 tool calls,
not enough for a typical bug fix's edit + test + check_criterion cycle.

Changes:
- New optional AgentConfig field max_tool_turns. When set the watchdog
  uses it instead of max_turns; only assistant messages whose
  data.message.content has at least one tool_use block count.
- count_turns_in_log in agents/pool/auto_assign/watchdog/limits.rs
  filters on tool_use. Existing test helper write_fake_session_log now
  emits tool_use blocks; added write_fake_mixed_session_log for the
  narration regression test.
- agents.toml: coders/coder-opus get max_turns=200 (claude-code's own
  --max-turns cap, sized to never bite before the watchdog) and
  max_tool_turns=80. qa: 120 / 40. mergemaster: 250 / 100. Budgets
  unchanged — the dollar cap remains the runaway-loop backstop, with
  ~$3-5 worst-case waste if an agent narrates indefinitely.
- Two new regression tests:
  * watchdog_does_not_count_narration_only_turns: 5 tool + 30 narration
    under max_tool_turns=10 stays Running.
  * watchdog_max_tool_turns_overrides_max_turns: 4 tool turns at
    max_tool_turns=3 / max_turns=200 still terminates with TurnLimit.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 17:25:11 +01:00

117 lines
3.9 KiB
Rust

//! Shared test helpers for the watchdog module.
use std::path::Path;
mod limits_tests;
mod orphan_tests;
/// Write a fake session log file with `n` tool-using assistant turn entries.
///
/// Each turn includes a single `tool_use` content block so it counts under
/// the watchdog's tool-only turn filter (story 923). The file is named
/// `{agent_name}-{session_id}.log` to match the real naming convention
/// used by `AgentLogWriter`.
pub(super) fn write_fake_session_log(
project_root: &Path,
story_id: &str,
agent_name: &str,
session_id: &str,
n_turns: u64,
) {
write_fake_mixed_session_log(project_root, story_id, agent_name, session_id, n_turns, 0);
}
/// Write a fake session log with `n_tool` tool-using assistant turns and
/// `n_narration` text-only narration turns (story 923 regression).
///
/// Tool turns contain a `tool_use` content block; narration turns contain
/// only a `text` block. The watchdog's `count_turns_in_log` must report
/// `n_tool` only — narration must not count.
pub(super) fn write_fake_mixed_session_log(
project_root: &Path,
story_id: &str,
agent_name: &str,
session_id: &str,
n_tool: u64,
n_narration: u64,
) {
let log_dir = project_root.join(".huskies").join("logs").join(story_id);
std::fs::create_dir_all(&log_dir).unwrap();
let log_path = log_dir.join(format!("{agent_name}-{session_id}.log"));
let mut content = String::new();
for _ in 0..n_tool {
content.push_str(
&serde_json::to_string(&serde_json::json!({
"timestamp": "2026-04-25T00:00:00Z",
"type": "agent_json",
"story_id": story_id,
"agent_name": agent_name,
"data": {
"type": "assistant",
"message": {
"content": [
{ "type": "tool_use", "name": "Read", "input": {} }
]
}
}
}))
.unwrap(),
);
content.push('\n');
}
for _ in 0..n_narration {
content.push_str(
&serde_json::to_string(&serde_json::json!({
"timestamp": "2026-04-25T00:00:00Z",
"type": "agent_json",
"story_id": story_id,
"agent_name": agent_name,
"data": {
"type": "assistant",
"message": {
"content": [
{ "type": "text", "text": "Now let me read the next file." }
]
}
}
}))
.unwrap(),
);
content.push('\n');
}
std::fs::write(log_path, content).unwrap();
}
/// Write a fake session log containing a `result` event with the given cost.
///
/// Used to test budget enforcement via the watchdog's per-session log
/// reading (not `token_usage.jsonl`).
pub(super) fn write_fake_budget_session_log(
project_root: &Path,
story_id: &str,
agent_name: &str,
session_id: &str,
cost_usd: f64,
) {
let log_dir = project_root.join(".huskies").join("logs").join(story_id);
std::fs::create_dir_all(&log_dir).unwrap();
let log_path = log_dir.join(format!("{agent_name}-{session_id}.log"));
let content = serde_json::to_string(&serde_json::json!({
"timestamp": "2026-04-25T00:00:00Z",
"type": "agent_json",
"story_id": story_id,
"agent_name": agent_name,
"data": { "type": "result", "total_cost_usd": cost_usd }
}))
.unwrap()
+ "\n";
std::fs::write(log_path, content).unwrap();
}
/// Write a minimal project.toml with the given agent config.
pub(super) fn write_project_config(project_root: &Path, config_toml: &str) {
let huskies_dir = project_root.join(".huskies");
std::fs::create_dir_all(&huskies_dir).unwrap();
std::fs::write(huskies_dir.join("project.toml"), config_toml).unwrap();
}