story-kit: merge 161_bug_auto_assign_only_triggers_on_agent_completion_not_on_failure_or_periodically
This commit is contained in:
@@ -1658,14 +1658,24 @@ impl AgentPool {
|
||||
/// The watchdog runs every 30 seconds. It is a safety net for edge cases where the
|
||||
/// PTY read loop exits without updating the agent status (e.g. a panic in the
|
||||
/// spawn_blocking task, or an external SIGKILL that closes the PTY fd immediately).
|
||||
pub fn spawn_watchdog(&self) {
|
||||
let agents = Arc::clone(&self.agents);
|
||||
///
|
||||
/// When orphaned agents are detected and a `project_root` is provided, auto-assign
|
||||
/// is triggered so that free agents can pick up unassigned work.
|
||||
pub fn spawn_watchdog(pool: Arc<AgentPool>, project_root: Option<PathBuf>) {
|
||||
tokio::spawn(async move {
|
||||
let mut interval =
|
||||
tokio::time::interval(std::time::Duration::from_secs(30));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
check_orphaned_agents(&agents);
|
||||
let found = check_orphaned_agents(&pool.agents);
|
||||
if found > 0
|
||||
&& let Some(ref root) = project_root
|
||||
{
|
||||
slog!(
|
||||
"[watchdog] {found} orphaned agent(s) detected; triggering auto-assign."
|
||||
);
|
||||
pool.auto_assign_available_work(root).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1825,10 +1835,10 @@ fn find_free_agent_for_stage<'a>(
|
||||
/// without updating the agent status — for example when the process is killed
|
||||
/// externally and the PTY master fd returns EOF before our inactivity timeout
|
||||
/// fires, but some other edge case prevents the normal cleanup path from running.
|
||||
fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) {
|
||||
fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) -> usize {
|
||||
let mut lock = match agents.lock() {
|
||||
Ok(l) => l,
|
||||
Err(_) => return,
|
||||
Err(_) => return 0,
|
||||
};
|
||||
|
||||
// Collect orphaned entries: Running or Pending agents whose task handle is finished.
|
||||
@@ -1850,6 +1860,7 @@ fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let count = orphaned.len();
|
||||
for (key, story_id, tx, prev_status) in orphaned {
|
||||
if let Some(agent) = lock.get_mut(&key) {
|
||||
agent.status = AgentStatus::Failed;
|
||||
@@ -1865,6 +1876,7 @@ fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) {
|
||||
});
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Server-owned completion: runs acceptance gates when an agent process exits
|
||||
@@ -5917,4 +5929,78 @@ theirs
|
||||
"merge workspace should be cleaned up after failure"
|
||||
);
|
||||
}
|
||||
|
||||
// ── check_orphaned_agents return value tests (bug 161) ──────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn check_orphaned_agents_returns_count_of_orphaned_agents() {
|
||||
let pool = AgentPool::new(3001);
|
||||
|
||||
// Spawn two tasks that finish immediately.
|
||||
let h1 = tokio::spawn(async {});
|
||||
let h2 = tokio::spawn(async {});
|
||||
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
||||
assert!(h1.is_finished());
|
||||
assert!(h2.is_finished());
|
||||
|
||||
pool.inject_test_agent_with_handle("story_a", "coder", AgentStatus::Running, h1);
|
||||
pool.inject_test_agent_with_handle("story_b", "coder", AgentStatus::Running, h2);
|
||||
|
||||
let found = check_orphaned_agents(&pool.agents);
|
||||
assert_eq!(found, 2, "should detect both orphaned agents");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_orphaned_agents_returns_zero_when_no_orphans() {
|
||||
let pool = AgentPool::new(3001);
|
||||
// Inject agents in terminal states — not orphaned.
|
||||
pool.inject_test_agent("story_a", "coder", AgentStatus::Completed);
|
||||
pool.inject_test_agent("story_b", "qa", AgentStatus::Failed);
|
||||
|
||||
let found = check_orphaned_agents(&pool.agents);
|
||||
assert_eq!(found, 0, "no orphans should be detected for terminal agents");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn watchdog_orphan_detection_returns_nonzero_enabling_auto_assign() {
|
||||
// This test verifies the contract that `check_orphaned_agents` returns
|
||||
// a non-zero count when orphans exist, which the watchdog uses to
|
||||
// decide whether to trigger auto-assign (bug 161).
|
||||
let pool = AgentPool::new(3001);
|
||||
|
||||
let handle = tokio::spawn(async {});
|
||||
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
||||
|
||||
pool.inject_test_agent_with_handle(
|
||||
"orphan_story",
|
||||
"coder",
|
||||
AgentStatus::Running,
|
||||
handle,
|
||||
);
|
||||
|
||||
// Before watchdog: agent is Running.
|
||||
{
|
||||
let agents = pool.agents.lock().unwrap();
|
||||
let key = composite_key("orphan_story", "coder");
|
||||
assert_eq!(agents.get(&key).unwrap().status, AgentStatus::Running);
|
||||
}
|
||||
|
||||
// Run watchdog pass — should return 1 (orphan found).
|
||||
let found = check_orphaned_agents(&pool.agents);
|
||||
assert_eq!(
|
||||
found, 1,
|
||||
"watchdog must return 1 for a single orphaned agent"
|
||||
);
|
||||
|
||||
// After watchdog: agent is Failed.
|
||||
{
|
||||
let agents = pool.agents.lock().unwrap();
|
||||
let key = composite_key("orphan_story", "coder");
|
||||
assert_eq!(
|
||||
agents.get(&key).unwrap().status,
|
||||
AgentStatus::Failed,
|
||||
"orphaned agent must be marked Failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user