From 0fe894c5a46357aed218d9e5f66a3cab371bd879 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 27 Feb 2026 10:50:53 +0000 Subject: [PATCH] story-kit: merge 231_bug_agent_silently_disappears_when_worktree_creation_fails --- server/src/agents.rs | 76 +++++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 22 deletions(-) diff --git a/server/src/agents.rs b/server/src/agents.rs index 53fb7ba..4a23ccf 100644 --- a/server/src/agents.rs +++ b/server/src/agents.rs @@ -518,15 +518,21 @@ impl AgentPool { { Ok(wt) => wt, Err(e) => { - // Worktree creation failed — mark agent as Failed so the UI shows the error. - let _ = tx_clone.send(AgentEvent::Error { + let error_msg = format!("Failed to create worktree: {e}"); + slog_error!("[agents] {error_msg}"); + let event = AgentEvent::Error { story_id: sid.clone(), agent_name: aname.clone(), - message: format!("Failed to create worktree: {e}"), - }); - if let Ok(mut agents) = agents_ref.lock() { - agents.remove(&key_clone); + message: error_msg, + }; + if let Ok(mut log) = log_clone.lock() { + log.push(event.clone()); } + let _ = tx_clone.send(event); + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) { + agent.status = AgentStatus::Failed; + } Self::notify_agent_state_changed(&watcher_tx_clone); return; } @@ -550,14 +556,21 @@ impl AgentPool { ) { Ok(result) => result, Err(e) => { - let _ = tx_clone.send(AgentEvent::Error { + let error_msg = format!("Failed to render agent args: {e}"); + slog_error!("[agents] {error_msg}"); + let event = AgentEvent::Error { story_id: sid.clone(), agent_name: aname.clone(), - message: format!("Failed to render agent args: {e}"), - }); - if let Ok(mut agents) = agents_ref.lock() { - agents.remove(&key_clone); + message: error_msg, + }; + if let Ok(mut log) = log_clone.lock() { + log.push(event.clone()); } + let _ = tx_clone.send(event); + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) { + agent.status = AgentStatus::Failed; + } Self::notify_agent_state_changed(&watcher_tx_clone); return; } @@ -614,14 +627,20 @@ impl AgentPool { Self::notify_agent_state_changed(&watcher_tx_clone); } Err(e) => { - let _ = tx_clone.send(AgentEvent::Error { + slog_error!("[agents] Agent process error for {aname} on {sid}: {e}"); + let event = AgentEvent::Error { story_id: sid.clone(), agent_name: aname.clone(), message: e, - }); - if let Ok(mut agents) = agents_ref.lock() { - agents.remove(&key_clone); + }; + if let Ok(mut log) = log_clone.lock() { + log.push(event.clone()); } + let _ = tx_clone.send(event); + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) { + agent.status = AgentStatus::Failed; + } Self::notify_agent_state_changed(&watcher_tx_clone); } } @@ -5292,15 +5311,28 @@ stage = "qa" "agent must transition to Failed after worktree creation error" ); - // The pool must NOT retain a Pending or Running entry for this agent. + // The pool must retain a Failed entry (not disappear silently). let agents = pool.agents.lock().unwrap(); - let leaked = agents.values().any(|a| { - a.agent_name == "qa" - && matches!(a.status, AgentStatus::Pending | AgentStatus::Running) - }); + let failed_entry = agents + .values() + .find(|a| a.agent_name == "qa" && a.status == AgentStatus::Failed); assert!( - !leaked, - "agent pool must not retain a Pending/Running entry after worktree creation fails" + failed_entry.is_some(), + "agent pool must retain a Failed entry so the UI can show the error state" + ); + drop(agents); + + // The AgentEvent::Error must be persisted in the event_log so late + // subscribers / polling clients can see the failure reason. + let events = pool + .drain_events("50_story_test", "qa") + .expect("drain_events should succeed"); + let has_error_event = events + .iter() + .any(|e| matches!(e, AgentEvent::Error { .. })); + assert!( + has_error_event, + "event_log must contain AgentEvent::Error after worktree creation fails" ); }