fix(923): watchdog counts only tool-using turns; narration-only turns no longer burn budget
Observed: stories 917, 918, 920, 910 all turn-limit-killed despite producing
real commits. Tally across their session logs shows 30–55% of assistant
turns were pure narration ("I'll read X next", "Now let me check Y") with
no tool_use. At 80 max_turns the effective work budget was ~44 tool calls,
not enough for a typical bug fix's edit + test + check_criterion cycle.
Changes:
- New optional AgentConfig field max_tool_turns. When set the watchdog
uses it instead of max_turns; only assistant messages whose
data.message.content has at least one tool_use block count.
- count_turns_in_log in agents/pool/auto_assign/watchdog/limits.rs
filters on tool_use. Existing test helper write_fake_session_log now
emits tool_use blocks; added write_fake_mixed_session_log for the
narration regression test.
- agents.toml: coders/coder-opus get max_turns=200 (claude-code's own
--max-turns cap, sized to never bite before the watchdog) and
max_tool_turns=80. qa: 120 / 40. mergemaster: 250 / 100. Budgets
unchanged — the dollar cap remains the runaway-loop backstop, with
~$3-5 worst-case waste if an agent narrates indefinitely.
- Two new regression tests:
* watchdog_does_not_count_narration_only_turns: 5 tool + 30 narration
under max_tool_turns=10 stays Running.
* watchdog_max_tool_turns_overrides_max_turns: 4 tool turns at
max_tool_turns=3 / max_turns=200 still terminates with TurnLimit.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+14
-7
@@ -3,7 +3,8 @@ name = "coder-1"
|
|||||||
stage = "coder"
|
stage = "coder"
|
||||||
role = "Full-stack engineer. Implements features across all components."
|
role = "Full-stack engineer. Implements features across all components."
|
||||||
model = "sonnet"
|
model = "sonnet"
|
||||||
max_turns = 80
|
max_turns = 200
|
||||||
|
max_tool_turns = 80
|
||||||
max_budget_usd = 5.00
|
max_budget_usd = 5.00
|
||||||
disallowed_tools = ["ScheduleWakeup"]
|
disallowed_tools = ["ScheduleWakeup"]
|
||||||
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
||||||
@@ -14,7 +15,8 @@ name = "coder-2"
|
|||||||
stage = "coder"
|
stage = "coder"
|
||||||
role = "Full-stack engineer. Implements features across all components."
|
role = "Full-stack engineer. Implements features across all components."
|
||||||
model = "sonnet"
|
model = "sonnet"
|
||||||
max_turns = 80
|
max_turns = 200
|
||||||
|
max_tool_turns = 80
|
||||||
max_budget_usd = 5.00
|
max_budget_usd = 5.00
|
||||||
disallowed_tools = ["ScheduleWakeup"]
|
disallowed_tools = ["ScheduleWakeup"]
|
||||||
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
||||||
@@ -25,7 +27,8 @@ name = "coder-3"
|
|||||||
stage = "coder"
|
stage = "coder"
|
||||||
role = "Full-stack engineer. Implements features across all components."
|
role = "Full-stack engineer. Implements features across all components."
|
||||||
model = "sonnet"
|
model = "sonnet"
|
||||||
max_turns = 80
|
max_turns = 200
|
||||||
|
max_tool_turns = 80
|
||||||
max_budget_usd = 5.00
|
max_budget_usd = 5.00
|
||||||
disallowed_tools = ["ScheduleWakeup"]
|
disallowed_tools = ["ScheduleWakeup"]
|
||||||
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
||||||
@@ -36,7 +39,8 @@ name = "qa-2"
|
|||||||
stage = "qa"
|
stage = "qa"
|
||||||
role = "Reviews coder work in worktrees: runs quality gates, verifies acceptance criteria, and reports findings."
|
role = "Reviews coder work in worktrees: runs quality gates, verifies acceptance criteria, and reports findings."
|
||||||
model = "sonnet"
|
model = "sonnet"
|
||||||
max_turns = 40
|
max_turns = 120
|
||||||
|
max_tool_turns = 40
|
||||||
max_budget_usd = 4.00
|
max_budget_usd = 4.00
|
||||||
prompt = """You are the QA agent for story {{story_id}}. Your job is to verify the coder's work satisfies the story's acceptance criteria and produce a structured QA report.
|
prompt = """You are the QA agent for story {{story_id}}. Your job is to verify the coder's work satisfies the story's acceptance criteria and produce a structured QA report.
|
||||||
|
|
||||||
@@ -127,7 +131,8 @@ name = "coder-opus"
|
|||||||
stage = "coder"
|
stage = "coder"
|
||||||
role = "Senior full-stack engineer for complex tasks. Implements features across all components."
|
role = "Senior full-stack engineer for complex tasks. Implements features across all components."
|
||||||
model = "opus"
|
model = "opus"
|
||||||
max_turns = 80
|
max_turns = 200
|
||||||
|
max_tool_turns = 80
|
||||||
max_budget_usd = 20.00
|
max_budget_usd = 20.00
|
||||||
disallowed_tools = ["ScheduleWakeup"]
|
disallowed_tools = ["ScheduleWakeup"]
|
||||||
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
prompt ="You are working in a git worktree on story {{story_id}}. Read CLAUDE.md first, then .huskies/README.md for the dev process, .huskies/specs/00_CONTEXT.md for what this project does, and .huskies/specs/tech/STACK.md for the tech stack and source map. The story details are in your prompt above. The worktree and feature branch already exist - do not create them.\n\n## Your workflow\n1. Read the story and understand the acceptance criteria.\n2. Implement the changes.\n3. As you complete each acceptance criterion, call check_criterion MCP tool to mark it done.\n4. Run the run_tests MCP tool. It blocks server-side until tests finish (up to 20 minutes) and returns the full result. Do NOT call get_test_result — run_tests already gives you the pass/fail outcome.\n5. If tests fail, fix the failures and run run_tests again. Do not commit until tests pass.\n6. Once tests pass, commit your work with a descriptive message and exit.\n\nDo NOT accept stories, move them between stages, or merge to master. The server handles all of that after you exit.\n\n## Bug Workflow: Trust the Story, Act Fast\nWhen working on bugs:\n1. READ THE STORY DESCRIPTION FIRST. If it specifies exact files, functions, and line numbers — go directly there and make the fix.\n2. If the story does NOT specify the exact location, investigate with targeted grep.\n3. Fix with a surgical, minimal change.\n4. Run tests, fix failures, commit and exit.\n5. Write commit messages that explain what broke and why."
|
||||||
@@ -138,7 +143,8 @@ name = "qa"
|
|||||||
stage = "qa"
|
stage = "qa"
|
||||||
role = "Reviews coder work in worktrees: runs quality gates, verifies acceptance criteria, and reports findings."
|
role = "Reviews coder work in worktrees: runs quality gates, verifies acceptance criteria, and reports findings."
|
||||||
model = "sonnet"
|
model = "sonnet"
|
||||||
max_turns = 40
|
max_turns = 120
|
||||||
|
max_tool_turns = 40
|
||||||
max_budget_usd = 4.00
|
max_budget_usd = 4.00
|
||||||
prompt = """You are the QA agent for story {{story_id}}. Your job is to verify the coder's work satisfies the story's acceptance criteria and produce a structured QA report.
|
prompt = """You are the QA agent for story {{story_id}}. Your job is to verify the coder's work satisfies the story's acceptance criteria and produce a structured QA report.
|
||||||
|
|
||||||
@@ -229,7 +235,8 @@ name = "mergemaster"
|
|||||||
stage = "mergemaster"
|
stage = "mergemaster"
|
||||||
role = "Merges completed coder work into master, runs quality gates, archives stories, and cleans up worktrees."
|
role = "Merges completed coder work into master, runs quality gates, archives stories, and cleans up worktrees."
|
||||||
model = "opus"
|
model = "opus"
|
||||||
max_turns = 100
|
max_turns = 250
|
||||||
|
max_tool_turns = 100
|
||||||
max_budget_usd = 25.00
|
max_budget_usd = 25.00
|
||||||
inactivity_timeout_secs = 900
|
inactivity_timeout_secs = 900
|
||||||
prompt = """You are the mergemaster agent for story {{story_id}}. Your job is to merge the completed coder work into master.
|
prompt = """You are the mergemaster agent for story {{story_id}}. Your job is to merge the completed coder work into master.
|
||||||
|
|||||||
@@ -41,7 +41,13 @@ pub(crate) fn resolve_session_log(
|
|||||||
crate::agent_log::find_latest_log(project_root, story_id, agent_name)
|
crate::agent_log::find_latest_log(project_root, story_id, agent_name)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Count `assistant` events in a single log file.
|
/// Count **tool-using** assistant turns in a single log file (story 923).
|
||||||
|
///
|
||||||
|
/// A turn counts only if its assistant message contains at least one
|
||||||
|
/// `content` block with `type == "tool_use"`. Narration-only turns
|
||||||
|
/// (text-only assistant messages such as "I'll read X next" preambles)
|
||||||
|
/// don't count against the watchdog limit — they make no progress, only
|
||||||
|
/// burn budget, and Sonnet emits them at roughly 30–55% of all turns.
|
||||||
pub(crate) fn count_turns_in_log(path: &Path) -> u64 {
|
pub(crate) fn count_turns_in_log(path: &Path) -> u64 {
|
||||||
let entries = match crate::agent_log::read_log(path) {
|
let entries = match crate::agent_log::read_log(path) {
|
||||||
Ok(e) => e,
|
Ok(e) => e,
|
||||||
@@ -50,13 +56,24 @@ pub(crate) fn count_turns_in_log(path: &Path) -> u64 {
|
|||||||
entries
|
entries
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|entry| {
|
.filter(|entry| {
|
||||||
entry.event.get("type").and_then(|v| v.as_str()) == Some("agent_json")
|
if entry.event.get("type").and_then(|v| v.as_str()) != Some("agent_json") {
|
||||||
&& entry
|
return false;
|
||||||
.event
|
}
|
||||||
.get("data")
|
let data = match entry.event.get("data") {
|
||||||
.and_then(|d| d.get("type"))
|
Some(d) => d,
|
||||||
.and_then(|v| v.as_str())
|
None => return false,
|
||||||
== Some("assistant")
|
};
|
||||||
|
if data.get("type").and_then(|v| v.as_str()) != Some("assistant") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Require at least one tool_use content block.
|
||||||
|
data.pointer("/message/content")
|
||||||
|
.and_then(|c| c.as_array())
|
||||||
|
.map(|arr| {
|
||||||
|
arr.iter()
|
||||||
|
.any(|item| item.get("type").and_then(|v| v.as_str()) == Some("tool_use"))
|
||||||
|
})
|
||||||
|
.unwrap_or(false)
|
||||||
})
|
})
|
||||||
.count() as u64
|
.count() as u64
|
||||||
}
|
}
|
||||||
@@ -103,7 +120,10 @@ pub(super) fn check_agent_limits(
|
|||||||
|
|
||||||
for (key, story_id, agent_name, tx, log_session_id) in &running {
|
for (key, story_id, agent_name, tx, log_session_id) in &running {
|
||||||
let agent_config = config.agent.iter().find(|a| a.name == *agent_name);
|
let agent_config = config.agent.iter().find(|a| a.name == *agent_name);
|
||||||
let max_turns = agent_config.and_then(|a| a.max_turns);
|
// The watchdog gates on max_tool_turns (counts only tool-using
|
||||||
|
// assistant turns) when set; otherwise falls back to max_turns for
|
||||||
|
// backwards compatibility with configs that haven't migrated yet.
|
||||||
|
let max_turns = agent_config.and_then(|a| a.max_tool_turns.or(a.max_turns));
|
||||||
let max_budget_usd = agent_config.and_then(|a| a.max_budget_usd);
|
let max_budget_usd = agent_config.and_then(|a| a.max_budget_usd);
|
||||||
|
|
||||||
// Skip agents with no limits configured.
|
// Skip agents with no limits configured.
|
||||||
|
|||||||
@@ -2,7 +2,10 @@
|
|||||||
//! for the watchdog (bugs 624, 646, 650).
|
//! for the watchdog (bugs 624, 646, 650).
|
||||||
|
|
||||||
use super::super::super::super::{AgentPool, composite_key};
|
use super::super::super::super::{AgentPool, composite_key};
|
||||||
use super::{write_fake_budget_session_log, write_fake_session_log, write_project_config};
|
use super::{
|
||||||
|
write_fake_budget_session_log, write_fake_mixed_session_log, write_fake_session_log,
|
||||||
|
write_project_config,
|
||||||
|
};
|
||||||
use crate::agents::{AgentEvent, AgentStatus, TerminationReason};
|
use crate::agents::{AgentEvent, AgentStatus, TerminationReason};
|
||||||
|
|
||||||
// ── Limit enforcement integration tests (bug 624) ────────────────────────
|
// ── Limit enforcement integration tests (bug 624) ────────────────────────
|
||||||
@@ -521,3 +524,92 @@ max_turns = 10
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Narration filter (story 923) ────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Story 923: narration-only assistant turns (text-only, no `tool_use`)
|
||||||
|
/// must not count against the watchdog's turn budget. A session log with
|
||||||
|
/// 5 tool turns and 30 narration turns reports turns_used == 5, so an
|
||||||
|
/// agent with max_tool_turns = 10 stays Running.
|
||||||
|
#[test]
|
||||||
|
fn watchdog_does_not_count_narration_only_turns() {
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let root = tmp.path();
|
||||||
|
|
||||||
|
write_project_config(
|
||||||
|
root,
|
||||||
|
r#"
|
||||||
|
[[agent]]
|
||||||
|
name = "coder-1"
|
||||||
|
runtime = "claude-code"
|
||||||
|
max_tool_turns = 10
|
||||||
|
max_turns = 200
|
||||||
|
"#,
|
||||||
|
);
|
||||||
|
|
||||||
|
// 5 tool-using turns + 30 narration-only turns.
|
||||||
|
// Without the filter, total turns = 35 > max_tool_turns = 10 → kill.
|
||||||
|
// With the filter, tool turns = 5 ≤ 10 → stay Running.
|
||||||
|
write_fake_mixed_session_log(root, "story_923", "coder-1", "sess-narr", 5, 30);
|
||||||
|
|
||||||
|
let pool = AgentPool::new_test(3001);
|
||||||
|
pool.inject_test_agent_with_session("story_923", "coder-1", AgentStatus::Running, "sess-narr");
|
||||||
|
|
||||||
|
let found = pool.run_watchdog_pass(Some(root));
|
||||||
|
assert_eq!(
|
||||||
|
found, 0,
|
||||||
|
"agent must not be terminated: only 5 tool turns of a 10-turn budget"
|
||||||
|
);
|
||||||
|
|
||||||
|
let agents = pool.agents.lock().unwrap();
|
||||||
|
let key = composite_key("story_923", "coder-1");
|
||||||
|
let agent = agents.get(&key).unwrap();
|
||||||
|
assert_eq!(agent.status, AgentStatus::Running);
|
||||||
|
assert!(agent.termination_reason.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Story 923: max_tool_turns takes precedence over max_turns when both are
|
||||||
|
/// set. With max_tool_turns = 3 and max_turns = 200, an agent that has 4
|
||||||
|
/// tool turns is killed even though total turns (4) is far below max_turns.
|
||||||
|
#[test]
|
||||||
|
fn watchdog_max_tool_turns_overrides_max_turns() {
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let root = tmp.path();
|
||||||
|
|
||||||
|
write_project_config(
|
||||||
|
root,
|
||||||
|
r#"
|
||||||
|
[[agent]]
|
||||||
|
name = "coder-1"
|
||||||
|
runtime = "claude-code"
|
||||||
|
max_tool_turns = 3
|
||||||
|
max_turns = 200
|
||||||
|
"#,
|
||||||
|
);
|
||||||
|
|
||||||
|
write_fake_mixed_session_log(root, "story_923b", "coder-1", "sess-over", 4, 0);
|
||||||
|
|
||||||
|
let pool = AgentPool::new_test(3001);
|
||||||
|
let tx = pool.inject_test_agent_with_session(
|
||||||
|
"story_923b",
|
||||||
|
"coder-1",
|
||||||
|
AgentStatus::Running,
|
||||||
|
"sess-over",
|
||||||
|
);
|
||||||
|
let mut rx = tx.subscribe();
|
||||||
|
|
||||||
|
let found = pool.run_watchdog_pass(Some(root));
|
||||||
|
assert!(
|
||||||
|
found >= 1,
|
||||||
|
"watchdog must terminate when tool turns exceed max_tool_turns"
|
||||||
|
);
|
||||||
|
|
||||||
|
let agents = pool.agents.lock().unwrap();
|
||||||
|
let key = composite_key("story_923b", "coder-1");
|
||||||
|
let agent = agents.get(&key).unwrap();
|
||||||
|
assert_eq!(agent.status, AgentStatus::Failed);
|
||||||
|
assert_eq!(agent.termination_reason, Some(TerminationReason::TurnLimit));
|
||||||
|
|
||||||
|
let event = rx.try_recv().expect("watchdog must emit an Error event");
|
||||||
|
assert!(matches!(event, AgentEvent::Error { .. }));
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,29 +5,75 @@ use std::path::Path;
|
|||||||
mod limits_tests;
|
mod limits_tests;
|
||||||
mod orphan_tests;
|
mod orphan_tests;
|
||||||
|
|
||||||
/// Write a fake session log file with `n` assistant turn entries.
|
/// Write a fake session log file with `n` tool-using assistant turn entries.
|
||||||
///
|
///
|
||||||
/// The file is named `{agent_name}-{session_id}.log` to match the
|
/// Each turn includes a single `tool_use` content block so it counts under
|
||||||
/// real naming convention used by `AgentLogWriter`.
|
/// the watchdog's tool-only turn filter (story 923). The file is named
|
||||||
|
/// `{agent_name}-{session_id}.log` to match the real naming convention
|
||||||
|
/// used by `AgentLogWriter`.
|
||||||
pub(super) fn write_fake_session_log(
|
pub(super) fn write_fake_session_log(
|
||||||
project_root: &Path,
|
project_root: &Path,
|
||||||
story_id: &str,
|
story_id: &str,
|
||||||
agent_name: &str,
|
agent_name: &str,
|
||||||
session_id: &str,
|
session_id: &str,
|
||||||
n_turns: u64,
|
n_turns: u64,
|
||||||
|
) {
|
||||||
|
write_fake_mixed_session_log(project_root, story_id, agent_name, session_id, n_turns, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write a fake session log with `n_tool` tool-using assistant turns and
|
||||||
|
/// `n_narration` text-only narration turns (story 923 regression).
|
||||||
|
///
|
||||||
|
/// Tool turns contain a `tool_use` content block; narration turns contain
|
||||||
|
/// only a `text` block. The watchdog's `count_turns_in_log` must report
|
||||||
|
/// `n_tool` only — narration must not count.
|
||||||
|
pub(super) fn write_fake_mixed_session_log(
|
||||||
|
project_root: &Path,
|
||||||
|
story_id: &str,
|
||||||
|
agent_name: &str,
|
||||||
|
session_id: &str,
|
||||||
|
n_tool: u64,
|
||||||
|
n_narration: u64,
|
||||||
) {
|
) {
|
||||||
let log_dir = project_root.join(".huskies").join("logs").join(story_id);
|
let log_dir = project_root.join(".huskies").join("logs").join(story_id);
|
||||||
std::fs::create_dir_all(&log_dir).unwrap();
|
std::fs::create_dir_all(&log_dir).unwrap();
|
||||||
let log_path = log_dir.join(format!("{agent_name}-{session_id}.log"));
|
let log_path = log_dir.join(format!("{agent_name}-{session_id}.log"));
|
||||||
let mut content = String::new();
|
let mut content = String::new();
|
||||||
for _ in 0..n_turns {
|
for _ in 0..n_tool {
|
||||||
content.push_str(
|
content.push_str(
|
||||||
&serde_json::to_string(&serde_json::json!({
|
&serde_json::to_string(&serde_json::json!({
|
||||||
"timestamp": "2026-04-25T00:00:00Z",
|
"timestamp": "2026-04-25T00:00:00Z",
|
||||||
"type": "agent_json",
|
"type": "agent_json",
|
||||||
"story_id": story_id,
|
"story_id": story_id,
|
||||||
"agent_name": agent_name,
|
"agent_name": agent_name,
|
||||||
"data": { "type": "assistant", "message": {} }
|
"data": {
|
||||||
|
"type": "assistant",
|
||||||
|
"message": {
|
||||||
|
"content": [
|
||||||
|
{ "type": "tool_use", "name": "Read", "input": {} }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
);
|
||||||
|
content.push('\n');
|
||||||
|
}
|
||||||
|
for _ in 0..n_narration {
|
||||||
|
content.push_str(
|
||||||
|
&serde_json::to_string(&serde_json::json!({
|
||||||
|
"timestamp": "2026-04-25T00:00:00Z",
|
||||||
|
"type": "agent_json",
|
||||||
|
"story_id": story_id,
|
||||||
|
"agent_name": agent_name,
|
||||||
|
"data": {
|
||||||
|
"type": "assistant",
|
||||||
|
"message": {
|
||||||
|
"content": [
|
||||||
|
{ "type": "text", "text": "Now let me read the next file." }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
}))
|
}))
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -242,6 +242,15 @@ pub struct AgentConfig {
|
|||||||
pub disallowed_tools: Option<Vec<String>>,
|
pub disallowed_tools: Option<Vec<String>>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub max_turns: Option<u32>,
|
pub max_turns: Option<u32>,
|
||||||
|
/// Watchdog cap on **tool-using** assistant turns (story 923).
|
||||||
|
///
|
||||||
|
/// Narration-only turns (assistant messages with no `tool_use` block)
|
||||||
|
/// don't count against this. When unset, the watchdog falls back to
|
||||||
|
/// `max_turns`. Set this lower than `max_turns` so claude-code's own
|
||||||
|
/// `--max-turns` cap (which counts narration) doesn't fire before the
|
||||||
|
/// watchdog.
|
||||||
|
#[serde(default)]
|
||||||
|
pub max_tool_turns: Option<u32>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub max_budget_usd: Option<f64>,
|
pub max_budget_usd: Option<f64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -325,6 +334,7 @@ impl Default for ProjectConfig {
|
|||||||
allowed_tools: None,
|
allowed_tools: None,
|
||||||
disallowed_tools: None,
|
disallowed_tools: None,
|
||||||
max_turns: None,
|
max_turns: None,
|
||||||
|
max_tool_turns: None,
|
||||||
max_budget_usd: None,
|
max_budget_usd: None,
|
||||||
system_prompt: None,
|
system_prompt: None,
|
||||||
stage: None,
|
stage: None,
|
||||||
|
|||||||
Reference in New Issue
Block a user