6feb68f3e3
Observed: stories 917, 918, 920, 910 all turn-limit-killed despite producing
real commits. Tally across their session logs shows 30–55% of assistant
turns were pure narration ("I'll read X next", "Now let me check Y") with
no tool_use. At 80 max_turns the effective work budget was ~44 tool calls,
not enough for a typical bug fix's edit + test + check_criterion cycle.
Changes:
- New optional AgentConfig field max_tool_turns. When set the watchdog
uses it instead of max_turns; only assistant messages whose
data.message.content has at least one tool_use block count.
- count_turns_in_log in agents/pool/auto_assign/watchdog/limits.rs
filters on tool_use. Existing test helper write_fake_session_log now
emits tool_use blocks; added write_fake_mixed_session_log for the
narration regression test.
- agents.toml: coders/coder-opus get max_turns=200 (claude-code's own
--max-turns cap, sized to never bite before the watchdog) and
max_tool_turns=80. qa: 120 / 40. mergemaster: 250 / 100. Budgets
unchanged — the dollar cap remains the runaway-loop backstop, with
~$3-5 worst-case waste if an agent narrates indefinitely.
- Two new regression tests:
* watchdog_does_not_count_narration_only_turns: 5 tool + 30 narration
under max_tool_turns=10 stays Running.
* watchdog_max_tool_turns_overrides_max_turns: 4 tool turns at
max_tool_turns=3 / max_turns=200 still terminates with TurnLimit.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
117 lines
3.9 KiB
Rust
117 lines
3.9 KiB
Rust
//! Shared test helpers for the watchdog module.
|
|
|
|
use std::path::Path;
|
|
|
|
mod limits_tests;
|
|
mod orphan_tests;
|
|
|
|
/// Write a fake session log file with `n` tool-using assistant turn entries.
|
|
///
|
|
/// Each turn includes a single `tool_use` content block so it counts under
|
|
/// the watchdog's tool-only turn filter (story 923). The file is named
|
|
/// `{agent_name}-{session_id}.log` to match the real naming convention
|
|
/// used by `AgentLogWriter`.
|
|
pub(super) fn write_fake_session_log(
|
|
project_root: &Path,
|
|
story_id: &str,
|
|
agent_name: &str,
|
|
session_id: &str,
|
|
n_turns: u64,
|
|
) {
|
|
write_fake_mixed_session_log(project_root, story_id, agent_name, session_id, n_turns, 0);
|
|
}
|
|
|
|
/// Write a fake session log with `n_tool` tool-using assistant turns and
|
|
/// `n_narration` text-only narration turns (story 923 regression).
|
|
///
|
|
/// Tool turns contain a `tool_use` content block; narration turns contain
|
|
/// only a `text` block. The watchdog's `count_turns_in_log` must report
|
|
/// `n_tool` only — narration must not count.
|
|
pub(super) fn write_fake_mixed_session_log(
|
|
project_root: &Path,
|
|
story_id: &str,
|
|
agent_name: &str,
|
|
session_id: &str,
|
|
n_tool: u64,
|
|
n_narration: u64,
|
|
) {
|
|
let log_dir = project_root.join(".huskies").join("logs").join(story_id);
|
|
std::fs::create_dir_all(&log_dir).unwrap();
|
|
let log_path = log_dir.join(format!("{agent_name}-{session_id}.log"));
|
|
let mut content = String::new();
|
|
for _ in 0..n_tool {
|
|
content.push_str(
|
|
&serde_json::to_string(&serde_json::json!({
|
|
"timestamp": "2026-04-25T00:00:00Z",
|
|
"type": "agent_json",
|
|
"story_id": story_id,
|
|
"agent_name": agent_name,
|
|
"data": {
|
|
"type": "assistant",
|
|
"message": {
|
|
"content": [
|
|
{ "type": "tool_use", "name": "Read", "input": {} }
|
|
]
|
|
}
|
|
}
|
|
}))
|
|
.unwrap(),
|
|
);
|
|
content.push('\n');
|
|
}
|
|
for _ in 0..n_narration {
|
|
content.push_str(
|
|
&serde_json::to_string(&serde_json::json!({
|
|
"timestamp": "2026-04-25T00:00:00Z",
|
|
"type": "agent_json",
|
|
"story_id": story_id,
|
|
"agent_name": agent_name,
|
|
"data": {
|
|
"type": "assistant",
|
|
"message": {
|
|
"content": [
|
|
{ "type": "text", "text": "Now let me read the next file." }
|
|
]
|
|
}
|
|
}
|
|
}))
|
|
.unwrap(),
|
|
);
|
|
content.push('\n');
|
|
}
|
|
std::fs::write(log_path, content).unwrap();
|
|
}
|
|
|
|
/// Write a fake session log containing a `result` event with the given cost.
|
|
///
|
|
/// Used to test budget enforcement via the watchdog's per-session log
|
|
/// reading (not `token_usage.jsonl`).
|
|
pub(super) fn write_fake_budget_session_log(
|
|
project_root: &Path,
|
|
story_id: &str,
|
|
agent_name: &str,
|
|
session_id: &str,
|
|
cost_usd: f64,
|
|
) {
|
|
let log_dir = project_root.join(".huskies").join("logs").join(story_id);
|
|
std::fs::create_dir_all(&log_dir).unwrap();
|
|
let log_path = log_dir.join(format!("{agent_name}-{session_id}.log"));
|
|
let content = serde_json::to_string(&serde_json::json!({
|
|
"timestamp": "2026-04-25T00:00:00Z",
|
|
"type": "agent_json",
|
|
"story_id": story_id,
|
|
"agent_name": agent_name,
|
|
"data": { "type": "result", "total_cost_usd": cost_usd }
|
|
}))
|
|
.unwrap()
|
|
+ "\n";
|
|
std::fs::write(log_path, content).unwrap();
|
|
}
|
|
|
|
/// Write a minimal project.toml with the given agent config.
|
|
pub(super) fn write_project_config(project_root: &Path, config_toml: &str) {
|
|
let huskies_dir = project_root.join(".huskies");
|
|
std::fs::create_dir_all(&huskies_dir).unwrap();
|
|
std::fs::write(huskies_dir.join("project.toml"), config_toml).unwrap();
|
|
}
|