story-kit: merge 161_bug_auto_assign_only_triggers_on_agent_completion_not_on_failure_or_periodically

This commit is contained in:
Dave
2026-02-24 17:28:45 +00:00
parent d00b459a74
commit fb91096b09
2 changed files with 121 additions and 6 deletions

View File

@@ -1658,14 +1658,24 @@ impl AgentPool {
/// The watchdog runs every 30 seconds. It is a safety net for edge cases where the
/// PTY read loop exits without updating the agent status (e.g. a panic in the
/// spawn_blocking task, or an external SIGKILL that closes the PTY fd immediately).
pub fn spawn_watchdog(&self) {
let agents = Arc::clone(&self.agents);
///
/// When orphaned agents are detected and a `project_root` is provided, auto-assign
/// is triggered so that free agents can pick up unassigned work.
pub fn spawn_watchdog(pool: Arc<AgentPool>, project_root: Option<PathBuf>) {
tokio::spawn(async move {
let mut interval =
tokio::time::interval(std::time::Duration::from_secs(30));
loop {
interval.tick().await;
check_orphaned_agents(&agents);
let found = check_orphaned_agents(&pool.agents);
if found > 0
&& let Some(ref root) = project_root
{
slog!(
"[watchdog] {found} orphaned agent(s) detected; triggering auto-assign."
);
pool.auto_assign_available_work(root).await;
}
}
});
}
@@ -1825,10 +1835,10 @@ fn find_free_agent_for_stage<'a>(
/// without updating the agent status — for example when the process is killed
/// externally and the PTY master fd returns EOF before our inactivity timeout
/// fires, but some other edge case prevents the normal cleanup path from running.
fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) {
fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) -> usize {
let mut lock = match agents.lock() {
Ok(l) => l,
Err(_) => return,
Err(_) => return 0,
};
// Collect orphaned entries: Running or Pending agents whose task handle is finished.
@@ -1850,6 +1860,7 @@ fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) {
})
.collect();
let count = orphaned.len();
for (key, story_id, tx, prev_status) in orphaned {
if let Some(agent) = lock.get_mut(&key) {
agent.status = AgentStatus::Failed;
@@ -1865,6 +1876,7 @@ fn check_orphaned_agents(agents: &Mutex<HashMap<String, StoryAgent>>) {
});
}
}
count
}
/// Server-owned completion: runs acceptance gates when an agent process exits
@@ -5917,4 +5929,78 @@ theirs
"merge workspace should be cleaned up after failure"
);
}
// ── check_orphaned_agents return value tests (bug 161) ──────────────────
#[tokio::test]
async fn check_orphaned_agents_returns_count_of_orphaned_agents() {
let pool = AgentPool::new(3001);
// Spawn two tasks that finish immediately.
let h1 = tokio::spawn(async {});
let h2 = tokio::spawn(async {});
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
assert!(h1.is_finished());
assert!(h2.is_finished());
pool.inject_test_agent_with_handle("story_a", "coder", AgentStatus::Running, h1);
pool.inject_test_agent_with_handle("story_b", "coder", AgentStatus::Running, h2);
let found = check_orphaned_agents(&pool.agents);
assert_eq!(found, 2, "should detect both orphaned agents");
}
#[test]
fn check_orphaned_agents_returns_zero_when_no_orphans() {
let pool = AgentPool::new(3001);
// Inject agents in terminal states — not orphaned.
pool.inject_test_agent("story_a", "coder", AgentStatus::Completed);
pool.inject_test_agent("story_b", "qa", AgentStatus::Failed);
let found = check_orphaned_agents(&pool.agents);
assert_eq!(found, 0, "no orphans should be detected for terminal agents");
}
#[tokio::test]
async fn watchdog_orphan_detection_returns_nonzero_enabling_auto_assign() {
// This test verifies the contract that `check_orphaned_agents` returns
// a non-zero count when orphans exist, which the watchdog uses to
// decide whether to trigger auto-assign (bug 161).
let pool = AgentPool::new(3001);
let handle = tokio::spawn(async {});
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
pool.inject_test_agent_with_handle(
"orphan_story",
"coder",
AgentStatus::Running,
handle,
);
// Before watchdog: agent is Running.
{
let agents = pool.agents.lock().unwrap();
let key = composite_key("orphan_story", "coder");
assert_eq!(agents.get(&key).unwrap().status, AgentStatus::Running);
}
// Run watchdog pass — should return 1 (orphan found).
let found = check_orphaned_agents(&pool.agents);
assert_eq!(
found, 1,
"watchdog must return 1 for a single orphaned agent"
);
// After watchdog: agent is Failed.
{
let agents = pool.agents.lock().unwrap();
let key = composite_key("orphan_story", "coder");
assert_eq!(
agents.get(&key).unwrap().status,
AgentStatus::Failed,
"orphaned agent must be marked Failed"
);
}
}
}