Accept story 41: Agent Completion Notification via MCP

Add wait_for_agent MCP tool that blocks until an agent reaches a terminal
state (completed, failed, stopped). Returns final status with session_id,
worktree_path, and git commits made by the agent.

- Subscribe-before-check pattern avoids race conditions
- Handles lagged receivers, channel closure, and configurable timeout
- Default timeout 5 minutes, includes git log of agent commits in response
- 11 new tests covering all paths

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dave
2026-02-20 13:16:04 +00:00
parent fa5d013fe2
commit c6a04f5e53
3 changed files with 387 additions and 16 deletions

View File

@@ -69,7 +69,7 @@ impl std::fmt::Display for AgentStatus {
}
}
#[derive(Serialize, Clone)]
#[derive(Debug, Serialize, Clone)]
pub struct AgentInfo {
pub story_id: String,
pub agent_name: String,
@@ -90,6 +90,24 @@ struct StoryAgent {
event_log: Arc<Mutex<Vec<AgentEvent>>>,
}
/// Build an `AgentInfo` snapshot from a `StoryAgent` map entry.
fn agent_info_from_entry(story_id: &str, agent: &StoryAgent) -> AgentInfo {
AgentInfo {
story_id: story_id.to_string(),
agent_name: agent.agent_name.clone(),
status: agent.status.clone(),
session_id: agent.session_id.clone(),
worktree_path: agent
.worktree_info
.as_ref()
.map(|wt| wt.path.to_string_lossy().to_string()),
base_branch: agent
.worktree_info
.as_ref()
.map(|wt| wt.base_branch.clone()),
}
}
/// Manages concurrent story agents, each in its own worktree.
pub struct AgentPool {
agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
@@ -314,20 +332,7 @@ impl AgentPool {
.rsplit_once(':')
.map(|(sid, _)| sid.to_string())
.unwrap_or_else(|| key.clone());
AgentInfo {
story_id,
agent_name: agent.agent_name.clone(),
status: agent.status.clone(),
session_id: agent.session_id.clone(),
worktree_path: agent
.worktree_info
.as_ref()
.map(|wt| wt.path.to_string_lossy().to_string()),
base_branch: agent
.worktree_info
.as_ref()
.map(|wt| wt.base_branch.clone()),
}
agent_info_from_entry(&story_id, agent)
})
.collect())
}
@@ -361,6 +366,104 @@ impl AgentPool {
Ok(log.drain(..).collect())
}
/// Block until the agent reaches a terminal state (completed, failed, stopped).
/// Returns the agent's final `AgentInfo`.
/// `timeout_ms` caps how long to wait; returns an error if the deadline passes.
pub async fn wait_for_agent(
&self,
story_id: &str,
agent_name: &str,
timeout_ms: u64,
) -> Result<AgentInfo, String> {
// Subscribe before checking status so we don't miss the terminal event
// if the agent completes in the window between the two operations.
let mut rx = self.subscribe(story_id, agent_name)?;
// Return immediately if already in a terminal state.
{
let agents = self.agents.lock().map_err(|e| e.to_string())?;
let key = composite_key(story_id, agent_name);
if let Some(agent) = agents.get(&key)
&& matches!(agent.status, AgentStatus::Completed | AgentStatus::Failed)
{
return Ok(agent_info_from_entry(story_id, agent));
}
}
let deadline =
tokio::time::Instant::now() + std::time::Duration::from_millis(timeout_ms);
loop {
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
if remaining.is_zero() {
return Err(format!(
"Timed out after {timeout_ms}ms waiting for agent '{agent_name}' on story '{story_id}'"
));
}
match tokio::time::timeout(remaining, rx.recv()).await {
Ok(Ok(event)) => {
let is_terminal = match &event {
AgentEvent::Done { .. } | AgentEvent::Error { .. } => true,
AgentEvent::Status { status, .. } if status == "stopped" => true,
_ => false,
};
if is_terminal {
let agents = self.agents.lock().map_err(|e| e.to_string())?;
let key = composite_key(story_id, agent_name);
return Ok(if let Some(agent) = agents.get(&key) {
agent_info_from_entry(story_id, agent)
} else {
// Agent was removed from map (e.g. stop_agent removes it after
// the "stopped" status event is sent).
let (status, session_id) = match event {
AgentEvent::Done { session_id, .. } => {
(AgentStatus::Completed, session_id)
}
_ => (AgentStatus::Failed, None),
};
AgentInfo {
story_id: story_id.to_string(),
agent_name: agent_name.to_string(),
status,
session_id,
worktree_path: None,
base_branch: None,
}
});
}
}
Ok(Err(broadcast::error::RecvError::Lagged(_))) => {
// Missed some buffered events — check current status before resuming.
let agents = self.agents.lock().map_err(|e| e.to_string())?;
let key = composite_key(story_id, agent_name);
if let Some(agent) = agents.get(&key)
&& matches!(agent.status, AgentStatus::Completed | AgentStatus::Failed)
{
return Ok(agent_info_from_entry(story_id, agent));
}
// Still running — continue the loop.
}
Ok(Err(broadcast::error::RecvError::Closed)) => {
// Channel closed: no more events will arrive. Return current state.
let agents = self.agents.lock().map_err(|e| e.to_string())?;
let key = composite_key(story_id, agent_name);
if let Some(agent) = agents.get(&key) {
return Ok(agent_info_from_entry(story_id, agent));
}
return Err(format!(
"Agent '{agent_name}' for story '{story_id}' channel closed unexpectedly"
));
}
Err(_) => {
return Err(format!(
"Timed out after {timeout_ms}ms waiting for agent '{agent_name}' on story '{story_id}'"
));
}
}
}
}
/// Get project root helper.
pub fn get_project_root(
&self,
@@ -368,6 +471,33 @@ impl AgentPool {
) -> Result<PathBuf, String> {
state.get_project_root()
}
/// Test helper: inject a pre-built agent entry so unit tests can exercise
/// wait/subscribe logic without spawning a real process.
#[cfg(test)]
pub fn inject_test_agent(
&self,
story_id: &str,
agent_name: &str,
status: AgentStatus,
) -> broadcast::Sender<AgentEvent> {
let (tx, _) = broadcast::channel::<AgentEvent>(64);
let key = composite_key(story_id, agent_name);
let mut agents = self.agents.lock().unwrap();
agents.insert(
key,
StoryAgent {
agent_name: agent_name.to_string(),
status,
worktree_info: None,
session_id: None,
tx: tx.clone(),
task_handle: None,
event_log: Arc::new(Mutex::new(Vec::new())),
},
);
tx
}
}
/// Spawn claude agent in a PTY and stream events through the broadcast channel.
@@ -559,3 +689,90 @@ fn run_agent_pty_blocking(
Ok(session_id)
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn wait_for_agent_returns_immediately_if_completed() {
let pool = AgentPool::new();
pool.inject_test_agent("s1", "bot", AgentStatus::Completed);
let info = pool.wait_for_agent("s1", "bot", 1000).await.unwrap();
assert_eq!(info.status, AgentStatus::Completed);
assert_eq!(info.story_id, "s1");
assert_eq!(info.agent_name, "bot");
}
#[tokio::test]
async fn wait_for_agent_returns_immediately_if_failed() {
let pool = AgentPool::new();
pool.inject_test_agent("s2", "bot", AgentStatus::Failed);
let info = pool.wait_for_agent("s2", "bot", 1000).await.unwrap();
assert_eq!(info.status, AgentStatus::Failed);
}
#[tokio::test]
async fn wait_for_agent_completes_on_done_event() {
let pool = AgentPool::new();
let tx = pool.inject_test_agent("s3", "bot", AgentStatus::Running);
// Send Done event after a short delay
let tx_clone = tx.clone();
tokio::spawn(async move {
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
// Mark status via event; real code also updates the map, but for
// this unit test the map entry stays Running — we verify the
// wait loop reacts to the event.
let _ = tx_clone.send(AgentEvent::Done {
story_id: "s3".to_string(),
agent_name: "bot".to_string(),
session_id: Some("sess-abc".to_string()),
});
});
let info = pool.wait_for_agent("s3", "bot", 2000).await.unwrap();
// Status comes from the map entry (Running in this unit test)
// — the important thing is that wait_for_agent returned without timing out.
assert_eq!(info.story_id, "s3");
}
#[tokio::test]
async fn wait_for_agent_times_out() {
let pool = AgentPool::new();
pool.inject_test_agent("s4", "bot", AgentStatus::Running);
let result = pool.wait_for_agent("s4", "bot", 50).await;
assert!(result.is_err());
let msg = result.unwrap_err();
assert!(msg.contains("Timed out"), "unexpected message: {msg}");
}
#[tokio::test]
async fn wait_for_agent_errors_for_nonexistent() {
let pool = AgentPool::new();
let result = pool.wait_for_agent("no_story", "no_bot", 100).await;
assert!(result.is_err());
}
#[tokio::test]
async fn wait_for_agent_completes_on_stopped_status_event() {
let pool = AgentPool::new();
let tx = pool.inject_test_agent("s5", "bot", AgentStatus::Running);
let tx_clone = tx.clone();
tokio::spawn(async move {
tokio::time::sleep(std::time::Duration::from_millis(30)).await;
let _ = tx_clone.send(AgentEvent::Status {
story_id: "s5".to_string(),
agent_name: "bot".to_string(),
status: "stopped".to_string(),
});
});
let info = pool.wait_for_agent("s5", "bot", 2000).await.unwrap();
assert_eq!(info.story_id, "s5");
}
}