Files
huskies/server/src/agents/pool/auto_assign/watchdog/mod.rs
T

86 lines
3.3 KiB
Rust
Raw Normal View History

2026-04-28 11:13:02 +00:00
//! Watchdog task: detects orphaned agents, enforces turn/budget limits, and
//! triggers auto-assign.
mod budget;
mod limits;
mod orphan;
#[cfg(test)]
mod tests;
use std::path::Path;
use crate::config::ProjectConfig;
use crate::slog;
use super::super::AgentPool;
use limits::check_agent_limits;
use orphan::check_orphaned_agents;
pub(crate) use budget::{compute_budget_from_logs, compute_budget_from_single_log};
pub(crate) use limits::{count_turns_in_log, resolve_session_log};
impl AgentPool {
/// Run a single watchdog pass synchronously (test helper).
#[cfg(test)]
pub fn run_watchdog_once(&self) {
check_orphaned_agents(&self.agents);
}
/// Run one watchdog pass: detect orphans, enforce limits, kill offenders.
///
/// Called by the unified background tick loop every 30 ticks.
///
/// When a limit is exceeded the agent's PTY child is killed and the
/// `should_block_story` retry mechanism is invoked. The story is marked
/// `blocked: true` only when `retry_count >= max_retries`; otherwise
/// `retry_count` is incremented and the story stays in `2_current/` for
/// re-attempt. This prevents the original kill-respawn loop (bug 646)
/// while restoring the `max_retries` semantic for turn/budget overruns.
pub fn run_watchdog_pass(&self, project_root: Option<&Path>) -> usize {
let orphaned = check_orphaned_agents(&self.agents);
if let Some(root) = project_root {
let terminated = check_agent_limits(&self.agents, root);
let config = ProjectConfig::load(root).unwrap_or_default();
for (key, _reason) in &terminated {
// Kill the PTY child and abort the task, same as stop_agent.
self.kill_child_for_key(key);
if let Ok(mut lock) = self.agents.lock()
&& let Some(agent) = lock.get_mut(key)
&& let Some(handle) = agent.task_handle.take()
{
handle.abort();
}
// Use the retry mechanism: increment retry_count and only block
// when the limit is exceeded, matching the pipeline's behaviour.
let story_id = key.rsplit_once(':').map(|(s, _)| s).unwrap_or(key);
if let Some(block_reason) = super::super::pipeline::should_block_story(
story_id,
config.max_retries,
"watchdog",
) {
let _ = self
.watcher_tx
.send(crate::io::watcher::WatcherEvent::StoryBlocked {
story_id: story_id.to_string(),
reason: block_reason,
});
slog!("[watchdog] Story '{story_id}' blocked after exceeding retry limit.");
} else {
slog!(
"[watchdog] Story '{story_id}' retry incremented after limit \
termination; stays in 2_current/ for re-attempt."
);
}
}
if !terminated.is_empty() {
Self::notify_agent_state_changed(&self.watcher_tx);
}
return orphaned + terminated.len();
}
orphaned
}
}