diff --git a/server/src/agents.rs b/server/src/agents.rs index a0f4760..a9735ac 100644 --- a/server/src/agents.rs +++ b/server/src/agents.rs @@ -323,7 +323,11 @@ impl AgentPool { } /// Start an agent for a story: load config, create worktree, spawn agent. - /// If `agent_name` is None, defaults to the first configured agent. + /// + /// When `agent_name` is `None`, automatically selects the first idle coder + /// agent (story 190). If all coders are busy the call fails with an error + /// indicating the story will be picked up when one becomes available. + /// /// If `resume_context` is provided, it is appended to the rendered prompt /// so the agent can pick up from a previous failed attempt. pub async fn start_agent( @@ -335,59 +339,56 @@ impl AgentPool { ) -> Result { let config = ProjectConfig::load(project_root)?; - // Resolve agent name from config - let resolved_name = match agent_name { - Some(name) => { - config - .find_agent(name) - .ok_or_else(|| format!("No agent named '{name}' in config"))?; - name.to_string() - } - None => config - .default_coder_agent() - .ok_or_else(|| { - "No coder agent configured. Specify an agent_name explicitly.".to_string() - })? - .name - .clone(), - }; + // Validate explicit agent name early (no lock needed). + if let Some(name) = agent_name { + config + .find_agent(name) + .ok_or_else(|| format!("No agent named '{name}' in config"))?; + } - let key = composite_key(story_id, &resolved_name); - - // Create shared resources before the atomic check-and-insert so the - // agents lock is held continuously from the availability check through - // the Pending insert, eliminating the TOCTOU race (story 132). + // Create name-independent shared resources before the lock so they are + // ready for the atomic check-and-insert (story 132). let (tx, _) = broadcast::channel::(1024); - let event_log: Arc>> = Arc::new(Mutex::new(Vec::new())); - - // Generate a unique session ID for the persistent log file. let log_session_id = uuid::Uuid::new_v4().to_string(); - // Create persistent log writer. - let log_writer = match AgentLogWriter::new( - project_root, - story_id, - &resolved_name, - &log_session_id, - ) { - Ok(w) => Some(Arc::new(Mutex::new(w))), - Err(e) => { - eprintln!("[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}"); - None - } - }; - - // Atomically check availability and register as Pending. The lock is - // held continuously from the duplicate check through the HashMap insert - // so no concurrent start_agent call can slip through the check before - // this insert completes (fixes TOCTOU race, story 132). + // Atomically resolve agent name, check availability, and register as + // Pending. When `agent_name` is `None` the first idle coder is + // selected inside the lock so no TOCTOU race can occur between the + // availability check and the Pending insert (story 132, story 190). // // The `PendingGuard` ensures that if any step below fails the entry is // removed from the pool so it does not permanently block auto-assign // (bug 118). + let resolved_name: String; + let key: String; { let mut agents = self.agents.lock().map_err(|e| e.to_string())?; + + resolved_name = match agent_name { + Some(name) => name.to_string(), + None => find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder) + .map(|s| s.to_string()) + .ok_or_else(|| { + if config + .agent + .iter() + .any(|a| agent_config_stage(a) == PipelineStage::Coder) + { + format!( + "All coder agents are busy; story '{story_id}' will be \ + picked up when one becomes available" + ) + } else { + "No coder agent configured. Specify an agent_name explicitly." + .to_string() + } + })?, + }; + + key = composite_key(story_id, &resolved_name); + + // Check for duplicate assignment (same story + same agent already active). if let Some(agent) = agents.get(&key) && (agent.status == AgentStatus::Running || agent.status == AgentStatus::Pending) { @@ -396,24 +397,27 @@ impl AgentPool { agent.status )); } - // Enforce single-instance concurrency: if this agent is already running on - // any other story, reject the request. The story remains in its current pipeline - // directory and `auto_assign_available_work` will pick it up when the agent - // becomes free. - if let Some(busy_story) = agents.iter().find_map(|(k, a)| { - if a.agent_name == resolved_name - && matches!(a.status, AgentStatus::Running | AgentStatus::Pending) - { - Some( - k.rsplit_once(':') - .map(|(sid, _)| sid) - .unwrap_or(k) - .to_string(), - ) - } else { - None - } - }) { + // Enforce single-instance concurrency for explicitly-named agents: + // if this agent is already running on any other story, reject. + // Auto-selected agents are already guaranteed idle by + // find_free_agent_for_stage, so this check is only needed for + // explicit requests. + if agent_name.is_some() + && let Some(busy_story) = agents.iter().find_map(|(k, a)| { + if a.agent_name == resolved_name + && matches!(a.status, AgentStatus::Running | AgentStatus::Pending) + { + Some( + k.rsplit_once(':') + .map(|(sid, _)| sid) + .unwrap_or(k) + .to_string(), + ) + } else { + None + } + }) + { return Err(format!( "Agent '{resolved_name}' is already running on story '{busy_story}'; \ story '{story_id}' will be picked up when the agent becomes available" @@ -437,6 +441,23 @@ impl AgentPool { } let mut pending_guard = PendingGuard::new(self.agents.clone(), key.clone()); + // Create persistent log writer (needs resolved_name, so must be after + // the atomic resolution above). + let log_writer = match AgentLogWriter::new( + project_root, + story_id, + &resolved_name, + &log_session_id, + ) { + Ok(w) => Some(Arc::new(Mutex::new(w))), + Err(e) => { + eprintln!( + "[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}" + ); + None + } + }; + // Notify WebSocket clients that a new agent is pending. Self::notify_agent_state_changed(&self.watcher_tx); @@ -674,6 +695,28 @@ impl AgentPool { Ok(()) } + /// Return the names of configured agents for `stage` that are not currently + /// running or pending. + pub fn available_agents_for_stage( + &self, + config: &ProjectConfig, + stage: &PipelineStage, + ) -> Result, String> { + let agents = self.agents.lock().map_err(|e| e.to_string())?; + Ok(config + .agent + .iter() + .filter(|cfg| agent_config_stage(cfg) == *stage) + .filter(|cfg| { + !agents.values().any(|a| { + a.agent_name == cfg.name + && matches!(a.status, AgentStatus::Running | AgentStatus::Pending) + }) + }) + .map(|cfg| cfg.name.clone()) + .collect()) + } + /// List all agents with their status. pub fn list_agents(&self) -> Result, String> { let agents = self.agents.lock().map_err(|e| e.to_string())?; @@ -6299,4 +6342,198 @@ stage = "qa" (bug 173: lozenges must update when agents are assigned during pipeline advance)" ); } + + // ── available_agents_for_stage tests (story 190) ────────────────────────── + + fn make_config(toml_str: &str) -> ProjectConfig { + ProjectConfig::parse(toml_str).unwrap() + } + + #[test] + fn available_agents_for_stage_returns_idle_agents() { + let config = make_config( + r#" +[[agent]] +name = "coder-1" +stage = "coder" + +[[agent]] +name = "coder-2" +stage = "coder" + +[[agent]] +name = "qa" +stage = "qa" +"#, + ); + let pool = AgentPool::new_test(3001); + // coder-1 is busy on story-1 + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); + + let available = pool + .available_agents_for_stage(&config, &PipelineStage::Coder) + .unwrap(); + assert_eq!(available, vec!["coder-2"]); + + let available_qa = pool + .available_agents_for_stage(&config, &PipelineStage::Qa) + .unwrap(); + assert_eq!(available_qa, vec!["qa"]); + } + + #[test] + fn available_agents_for_stage_returns_empty_when_all_busy() { + let config = make_config( + r#" +[[agent]] +name = "coder-1" +stage = "coder" +"#, + ); + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); + + let available = pool + .available_agents_for_stage(&config, &PipelineStage::Coder) + .unwrap(); + assert!(available.is_empty()); + } + + #[test] + fn available_agents_for_stage_ignores_completed_agents() { + let config = make_config( + r#" +[[agent]] +name = "coder-1" +stage = "coder" +"#, + ); + let pool = AgentPool::new_test(3001); + // Completed agents should not count as busy. + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Completed); + + let available = pool + .available_agents_for_stage(&config, &PipelineStage::Coder) + .unwrap(); + assert_eq!(available, vec!["coder-1"]); + } + + #[tokio::test] + async fn start_agent_auto_selects_second_coder_when_first_busy() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".story_kit"); + std::fs::create_dir_all(&sk).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "supervisor" +stage = "other" + +[[agent]] +name = "coder-1" +stage = "coder" + +[[agent]] +name = "coder-2" +stage = "coder" +"#, + ) + .unwrap(); + + let pool = AgentPool::new_test(3001); + // coder-1 is busy on another story + pool.inject_test_agent("other-story", "coder-1", AgentStatus::Running); + + // Call start_agent without agent_name — should pick coder-2 + let result = pool + .start_agent(tmp.path(), "42_my_story", None, None) + .await; + // Will fail for infrastructure reasons (no git repo), but should NOT + // fail with "All coder agents are busy" — that would mean it didn't + // try coder-2. + match result { + Ok(info) => { + assert_eq!(info.agent_name, "coder-2"); + } + Err(err) => { + assert!( + !err.contains("All coder agents are busy"), + "should have selected coder-2 but got: {err}" + ); + assert!( + !err.contains("No coder agent configured"), + "should not fail on agent selection, got: {err}" + ); + } + } + } + + #[tokio::test] + async fn start_agent_returns_busy_when_all_coders_occupied() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".story_kit"); + std::fs::create_dir_all(&sk).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "coder-1" +stage = "coder" + +[[agent]] +name = "coder-2" +stage = "coder" +"#, + ) + .unwrap(); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); + pool.inject_test_agent("story-2", "coder-2", AgentStatus::Pending); + + let result = pool + .start_agent(tmp.path(), "story-3", None, None) + .await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("All coder agents are busy"), + "expected busy error, got: {err}" + ); + } + + #[tokio::test] + async fn start_agent_explicit_name_unchanged_when_busy() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".story_kit"); + std::fs::create_dir_all(&sk).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "coder-1" +stage = "coder" + +[[agent]] +name = "coder-2" +stage = "coder" +"#, + ) + .unwrap(); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); + + // Explicit request for coder-1 (busy) should fail even though coder-2 is free. + let result = pool + .start_agent(tmp.path(), "story-2", Some("coder-1"), None) + .await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("coder-1") && err.contains("already running"), + "expected explicit busy error, got: {err}" + ); + } } diff --git a/server/src/config.rs b/server/src/config.rs index 826483a..6e634e1 100644 --- a/server/src/config.rs +++ b/server/src/config.rs @@ -57,20 +57,6 @@ pub struct AgentConfig { pub inactivity_timeout_secs: u64, } -impl AgentConfig { - /// Returns true if this agent is a coder. - /// - /// Prefers the explicit `stage` field; falls back to checking whether the - /// agent name starts with `"coder"` for backwards compatibility. - pub fn is_coder(&self) -> bool { - match self.stage.as_deref() { - Some("coder") => true, - Some(_) => false, - None => self.name.starts_with("coder"), - } - } -} - fn default_path() -> String { ".".to_string() } @@ -203,15 +189,6 @@ impl ProjectConfig { self.agent.first() } - /// Get the first coder agent config. - /// - /// Prefers the explicit `stage = "coder"` field over the legacy name-based - /// heuristic (names starting with "coder"). Returns `None` if no coder - /// agent is configured. - pub fn default_coder_agent(&self) -> Option<&AgentConfig> { - self.agent.iter().find(|a| a.is_coder()) - } - /// Render template variables in agent args and prompt for the given agent. /// If `agent_name` is None, uses the first (default) agent. pub fn render_agent_args( @@ -503,89 +480,6 @@ name = "second" assert!(config.find_agent("missing").is_none()); } - #[test] - fn default_coder_agent_skips_supervisor() { - let toml_str = r#" -[[agent]] -name = "supervisor" -stage = "other" - -[[agent]] -name = "coder-1" -stage = "coder" -"#; - let config = ProjectConfig::parse(toml_str).unwrap(); - assert_eq!(config.default_coder_agent().unwrap().name, "coder-1"); - } - - #[test] - fn default_coder_agent_uses_name_heuristic_when_no_stage() { - let toml_str = r#" -[[agent]] -name = "supervisor" - -[[agent]] -name = "coder-1" -"#; - let config = ProjectConfig::parse(toml_str).unwrap(); - assert_eq!(config.default_coder_agent().unwrap().name, "coder-1"); - } - - #[test] - fn default_coder_agent_returns_none_when_no_coders() { - let toml_str = r#" -[[agent]] -name = "supervisor" -stage = "other" -"#; - let config = ProjectConfig::parse(toml_str).unwrap(); - assert!(config.default_coder_agent().is_none()); - } - - #[test] - fn is_coder_explicit_stage() { - let mut agent = AgentConfig { - name: "my-agent".to_string(), - role: String::new(), - command: "claude".to_string(), - args: vec![], - prompt: String::new(), - model: None, - allowed_tools: None, - max_turns: None, - max_budget_usd: None, - system_prompt: None, - stage: Some("coder".to_string()), - inactivity_timeout_secs: 300, - }; - assert!(agent.is_coder()); - agent.stage = Some("other".to_string()); - assert!(!agent.is_coder()); - } - - #[test] - fn is_coder_name_heuristic() { - let make = |name: &str| AgentConfig { - name: name.to_string(), - role: String::new(), - command: "claude".to_string(), - args: vec![], - prompt: String::new(), - model: None, - allowed_tools: None, - max_turns: None, - max_budget_usd: None, - system_prompt: None, - stage: None, - inactivity_timeout_secs: 300, - }; - assert!(make("coder-1").is_coder()); - assert!(make("coder-opus").is_coder()); - assert!(!make("supervisor").is_coder()); - assert!(!make("qa").is_coder()); - assert!(!make("mergemaster").is_coder()); - } - #[test] fn parse_project_toml_from_file() { let tmp = tempfile::tempdir().unwrap(); diff --git a/server/src/http/mcp.rs b/server/src/http/mcp.rs index 1dea685..93c3443 100644 --- a/server/src/http/mcp.rs +++ b/server/src/http/mcp.rs @@ -1,4 +1,4 @@ -use crate::agents::{close_bug_to_archive, move_story_to_archived, move_story_to_merge, move_story_to_qa}; +use crate::agents::{close_bug_to_archive, move_story_to_archived, move_story_to_merge, move_story_to_qa, PipelineStage}; use crate::config::ProjectConfig; use crate::log_buffer; use crate::slog_warn; @@ -1269,6 +1269,22 @@ fn get_agent_output_from_log( fn tool_get_agent_config(ctx: &AppContext) -> Result { let project_root = ctx.agents.get_project_root(&ctx.state)?; let config = ProjectConfig::load(&project_root)?; + + // Collect available (idle) agent names across all stages so the caller can + // see at a glance which agents are free to start (story 190). + let mut available_names: std::collections::HashSet = + std::collections::HashSet::new(); + for stage in &[ + PipelineStage::Coder, + PipelineStage::Qa, + PipelineStage::Mergemaster, + PipelineStage::Other, + ] { + if let Ok(names) = ctx.agents.available_agents_for_stage(&config, stage) { + available_names.extend(names); + } + } + serde_json::to_string_pretty(&json!(config .agent .iter() @@ -1279,6 +1295,7 @@ fn tool_get_agent_config(ctx: &AppContext) -> Result { "allowed_tools": a.allowed_tools, "max_turns": a.max_turns, "max_budget_usd": a.max_budget_usd, + "available": available_names.contains(&a.name), })) .collect::>())) .map_err(|e| format!("Serialization error: {e}"))