Accept story 41: Agent Completion Notification via MCP

Add wait_for_agent MCP tool that blocks until an agent reaches a terminal state (completed, failed, stopped). Returns final status with session_id, worktree_path, and git commits made by the agent. - Subscribe-before-check pattern avoids race conditions - Handles lagged receivers, channel closure, and configurable timeout - Default timeout 5 minutes, includes git log of agent commits in response - 11 new tests covering all paths Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 13:16:04 +00:00
parent fa5d013fe2
commit c6a04f5e53
3 changed files with 387 additions and 16 deletions
--- a/server/src/agents.rs
+++ b/server/src/agents.rs
@@ -69,7 +69,7 @@ impl std::fmt::Display for AgentStatus {
    }
 }

-#[derive(Serialize, Clone)]
+#[derive(Debug, Serialize, Clone)]
 pub struct AgentInfo {
    pub story_id: String,
    pub agent_name: String,
@@ -90,6 +90,24 @@ struct StoryAgent {
    event_log: Arc<Mutex<Vec<AgentEvent>>>,
 }

+/// Build an `AgentInfo` snapshot from a `StoryAgent` map entry.
+fn agent_info_from_entry(story_id: &str, agent: &StoryAgent) -> AgentInfo {
+    AgentInfo {
+        story_id: story_id.to_string(),
+        agent_name: agent.agent_name.clone(),
+        status: agent.status.clone(),
+        session_id: agent.session_id.clone(),
+        worktree_path: agent
+            .worktree_info
+            .as_ref()
+            .map(|wt| wt.path.to_string_lossy().to_string()),
+        base_branch: agent
+            .worktree_info
+            .as_ref()
+            .map(|wt| wt.base_branch.clone()),
+    }
+}
+
 /// Manages concurrent story agents, each in its own worktree.
 pub struct AgentPool {
    agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
@@ -314,20 +332,7 @@ impl AgentPool {
                    .rsplit_once(':')
                    .map(|(sid, _)| sid.to_string())
                    .unwrap_or_else(|| key.clone());
-                AgentInfo {
-                    story_id,
-                    agent_name: agent.agent_name.clone(),
-                    status: agent.status.clone(),
-                    session_id: agent.session_id.clone(),
-                    worktree_path: agent
-                        .worktree_info
-                        .as_ref()
-                        .map(|wt| wt.path.to_string_lossy().to_string()),
-                    base_branch: agent
-                        .worktree_info
-                        .as_ref()
-                        .map(|wt| wt.base_branch.clone()),
-                }
+                agent_info_from_entry(&story_id, agent)
            })
            .collect())
    }
@@ -361,6 +366,104 @@ impl AgentPool {
        Ok(log.drain(..).collect())
    }

+    /// Block until the agent reaches a terminal state (completed, failed, stopped).
+    /// Returns the agent's final `AgentInfo`.
+    /// `timeout_ms` caps how long to wait; returns an error if the deadline passes.
+    pub async fn wait_for_agent(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        timeout_ms: u64,
+    ) -> Result<AgentInfo, String> {
+        // Subscribe before checking status so we don't miss the terminal event
+        // if the agent completes in the window between the two operations.
+        let mut rx = self.subscribe(story_id, agent_name)?;
+
+        // Return immediately if already in a terminal state.
+        {
+            let agents = self.agents.lock().map_err(|e| e.to_string())?;
+            let key = composite_key(story_id, agent_name);
+            if let Some(agent) = agents.get(&key)
+                && matches!(agent.status, AgentStatus::Completed | AgentStatus::Failed)
+            {
+                return Ok(agent_info_from_entry(story_id, agent));
+            }
+        }
+
+        let deadline =
+            tokio::time::Instant::now() + std::time::Duration::from_millis(timeout_ms);
+
+        loop {
+            let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
+            if remaining.is_zero() {
+                return Err(format!(
+                    "Timed out after {timeout_ms}ms waiting for agent '{agent_name}' on story '{story_id}'"
+                ));
+            }
+
+            match tokio::time::timeout(remaining, rx.recv()).await {
+                Ok(Ok(event)) => {
+                    let is_terminal = match &event {
+                        AgentEvent::Done { .. } | AgentEvent::Error { .. } => true,
+                        AgentEvent::Status { status, .. } if status == "stopped" => true,
+                        _ => false,
+                    };
+                    if is_terminal {
+                        let agents = self.agents.lock().map_err(|e| e.to_string())?;
+                        let key = composite_key(story_id, agent_name);
+                        return Ok(if let Some(agent) = agents.get(&key) {
+                            agent_info_from_entry(story_id, agent)
+                        } else {
+                            // Agent was removed from map (e.g. stop_agent removes it after
+                            // the "stopped" status event is sent).
+                            let (status, session_id) = match event {
+                                AgentEvent::Done { session_id, .. } => {
+                                    (AgentStatus::Completed, session_id)
+                                }
+                                _ => (AgentStatus::Failed, None),
+                            };
+                            AgentInfo {
+                                story_id: story_id.to_string(),
+                                agent_name: agent_name.to_string(),
+                                status,
+                                session_id,
+                                worktree_path: None,
+                                base_branch: None,
+                            }
+                        });
+                    }
+                }
+                Ok(Err(broadcast::error::RecvError::Lagged(_))) => {
+                    // Missed some buffered events — check current status before resuming.
+                    let agents = self.agents.lock().map_err(|e| e.to_string())?;
+                    let key = composite_key(story_id, agent_name);
+                    if let Some(agent) = agents.get(&key)
+                        && matches!(agent.status, AgentStatus::Completed | AgentStatus::Failed)
+                    {
+                        return Ok(agent_info_from_entry(story_id, agent));
+                    }
+                    // Still running — continue the loop.
+                }
+                Ok(Err(broadcast::error::RecvError::Closed)) => {
+                    // Channel closed: no more events will arrive. Return current state.
+                    let agents = self.agents.lock().map_err(|e| e.to_string())?;
+                    let key = composite_key(story_id, agent_name);
+                    if let Some(agent) = agents.get(&key) {
+                        return Ok(agent_info_from_entry(story_id, agent));
+                    }
+                    return Err(format!(
+                        "Agent '{agent_name}' for story '{story_id}' channel closed unexpectedly"
+                    ));
+                }
+                Err(_) => {
+                    return Err(format!(
+                        "Timed out after {timeout_ms}ms waiting for agent '{agent_name}' on story '{story_id}'"
+                    ));
+                }
+            }
+        }
+    }
+
    /// Get project root helper.
    pub fn get_project_root(
        &self,
@@ -368,6 +471,33 @@ impl AgentPool {
    ) -> Result<PathBuf, String> {
        state.get_project_root()
    }
+
+    /// Test helper: inject a pre-built agent entry so unit tests can exercise
+    /// wait/subscribe logic without spawning a real process.
+    #[cfg(test)]
+    pub fn inject_test_agent(
+        &self,
+        story_id: &str,
+        agent_name: &str,
+        status: AgentStatus,
+    ) -> broadcast::Sender<AgentEvent> {
+        let (tx, _) = broadcast::channel::<AgentEvent>(64);
+        let key = composite_key(story_id, agent_name);
+        let mut agents = self.agents.lock().unwrap();
+        agents.insert(
+            key,
+            StoryAgent {
+                agent_name: agent_name.to_string(),
+                status,
+                worktree_info: None,
+                session_id: None,
+                tx: tx.clone(),
+                task_handle: None,
+                event_log: Arc::new(Mutex::new(Vec::new())),
+            },
+        );
+        tx
+    }
 }

 /// Spawn claude agent in a PTY and stream events through the broadcast channel.
@@ -559,3 +689,90 @@ fn run_agent_pty_blocking(

    Ok(session_id)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn wait_for_agent_returns_immediately_if_completed() {
+        let pool = AgentPool::new();
+        pool.inject_test_agent("s1", "bot", AgentStatus::Completed);
+
+        let info = pool.wait_for_agent("s1", "bot", 1000).await.unwrap();
+        assert_eq!(info.status, AgentStatus::Completed);
+        assert_eq!(info.story_id, "s1");
+        assert_eq!(info.agent_name, "bot");
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_returns_immediately_if_failed() {
+        let pool = AgentPool::new();
+        pool.inject_test_agent("s2", "bot", AgentStatus::Failed);
+
+        let info = pool.wait_for_agent("s2", "bot", 1000).await.unwrap();
+        assert_eq!(info.status, AgentStatus::Failed);
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_completes_on_done_event() {
+        let pool = AgentPool::new();
+        let tx = pool.inject_test_agent("s3", "bot", AgentStatus::Running);
+
+        // Send Done event after a short delay
+        let tx_clone = tx.clone();
+        tokio::spawn(async move {
+            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+            // Mark status via event; real code also updates the map, but for
+            // this unit test the map entry stays Running — we verify the
+            // wait loop reacts to the event.
+            let _ = tx_clone.send(AgentEvent::Done {
+                story_id: "s3".to_string(),
+                agent_name: "bot".to_string(),
+                session_id: Some("sess-abc".to_string()),
+            });
+        });
+
+        let info = pool.wait_for_agent("s3", "bot", 2000).await.unwrap();
+        // Status comes from the map entry (Running in this unit test)
+        // — the important thing is that wait_for_agent returned without timing out.
+        assert_eq!(info.story_id, "s3");
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_times_out() {
+        let pool = AgentPool::new();
+        pool.inject_test_agent("s4", "bot", AgentStatus::Running);
+
+        let result = pool.wait_for_agent("s4", "bot", 50).await;
+        assert!(result.is_err());
+        let msg = result.unwrap_err();
+        assert!(msg.contains("Timed out"), "unexpected message: {msg}");
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_errors_for_nonexistent() {
+        let pool = AgentPool::new();
+        let result = pool.wait_for_agent("no_story", "no_bot", 100).await;
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn wait_for_agent_completes_on_stopped_status_event() {
+        let pool = AgentPool::new();
+        let tx = pool.inject_test_agent("s5", "bot", AgentStatus::Running);
+
+        let tx_clone = tx.clone();
+        tokio::spawn(async move {
+            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
+            let _ = tx_clone.send(AgentEvent::Status {
+                story_id: "s5".to_string(),
+                agent_name: "bot".to_string(),
+                status: "stopped".to_string(),
+            });
+        });
+
+        let info = pool.wait_for_agent("s5", "bot", 2000).await.unwrap();
+        assert_eq!(info.story_id, "s5");
+    }
+}