//! Agent start — spawns a new agent process in a worktree for a given story. #![allow(unused_imports, dead_code)] use crate::agent_log::AgentLogWriter; use crate::config::ProjectConfig; use crate::slog_error; use std::path::Path; use std::sync::{Arc, Mutex}; use tokio::sync::broadcast; use super::super::runtime::{ AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext, }; use super::super::{ AgentEvent, AgentInfo, AgentStatus, PipelineStage, agent_config_stage, pipeline_stage, }; use super::types::{PendingGuard, StoryAgent, composite_key}; use super::worktree::find_active_story_stage; use super::{AgentPool, auto_assign}; mod spawn; mod validation; use validation::{read_front_matter_agent, validate_agent_stage}; impl AgentPool { /// Start an agent for a story: load config, create worktree, spawn agent. /// /// When `agent_name` is `None`, automatically selects the first idle coder /// agent (story 190). If all coders are busy the call fails with an error /// indicating the story will be picked up when one becomes available. /// /// If `resume_context` is provided and `session_id_to_resume` is `None`, /// the context is appended to the rendered prompt so the agent can pick up /// from a previous failed attempt. /// /// If `session_id_to_resume` is provided, the agent is launched with /// `--resume ` instead of `-p `. Only /// `resume_context` (if any) is sent as the new message. This lets /// the agent re-enter the previous conversation without re-reading /// CLAUDE.md and README, satisfying story 543. pub async fn start_agent( &self, project_root: &Path, story_id: &str, agent_name: Option<&str>, resume_context: Option<&str>, session_id_to_resume: Option, ) -> Result { let config = ProjectConfig::load(project_root)?; // Validate explicit agent name early (no lock needed). if let Some(name) = agent_name { config .find_agent(name) .ok_or_else(|| format!("No agent named '{name}' in config"))?; } // Create name-independent shared resources before the lock so they are // ready for the atomic check-and-insert (story 132). let (tx, _) = broadcast::channel::(1024); let event_log: Arc>> = Arc::new(Mutex::new(Vec::new())); let log_session_id = uuid::Uuid::new_v4().to_string(); // Create the per-session status buffer subscribed to this project's // broadcaster. On restart a fresh buffer replaces the old one, // giving each session an independent, clean subscription (story 735). let status_buffer = crate::service::status::buffer::StatusEventBuffer::new(&self.status_broadcaster); // Move story from backlog/ to current/ before checking agent // availability so that auto_assign_available_work can pick it up even // when all coders are currently busy (story 203). Only do this for // Coder-stage agents — QA and Mergemaster must attach to the story // at its existing stage (3_qa or 4_merge) and must NOT be demoted // back to 2_current/ on attach (bug 502). When `agent_name` is None // we are auto-selecting an idle coder, so still move. let starting_a_coder = agent_name .and_then(|n| config.find_agent(n).map(agent_config_stage)) .map(|s| s == PipelineStage::Coder) .unwrap_or(true); if starting_a_coder { crate::agents::lifecycle::move_story_to_current(story_id)?; } // Validate that the agent's configured stage matches the story's // pipeline stage. (See validation::validate_agent_stage.) validate_agent_stage(&config, project_root, story_id, agent_name)?; // Read the preferred agent from the story's front matter before acquiring // the lock. (See validation::read_front_matter_agent.) let front_matter_agent: Option = read_front_matter_agent(story_id, agent_name); // Atomically resolve agent name, check availability, and register as // Pending. When `agent_name` is `None` the first idle coder is // selected inside the lock so no TOCTOU race can occur between the // availability check and the Pending insert (story 132, story 190). // // The `PendingGuard` ensures that if any step below fails the entry is // removed from the pool so it does not permanently block auto-assign // (bug 118). let resolved_name: String; let key: String; // Buffered status events accumulated while the agent was idle. Drained // inside the lock (before the new entry replaces the old one) and // formatted as a `` block for prepending to the first // agent turn (story 736). let prior_events: Option; { let mut agents = self.agents.lock().map_err(|e| e.to_string())?; resolved_name = match agent_name { Some(name) => name.to_string(), None => { // Honour the `agent:` field in the story's front matter so that // `start 368` after `assign 368 opus` picks the right agent // (bug 379). Mirrors the auto_assign selection logic. if let Some(ref pref) = front_matter_agent { let stage_matches = config .find_agent(pref) .map(|cfg| agent_config_stage(cfg) == PipelineStage::Coder) .unwrap_or(false); if stage_matches { if auto_assign::is_agent_free(&agents, pref) { pref.clone() } else { return Err(format!( "Preferred agent '{pref}' from story front matter is busy; \ story '{story_id}' has been queued in work/2_current/ and will \ be auto-assigned when it becomes available" )); } } else { // Stage mismatch — fall back to any free coder. auto_assign::find_free_agent_for_stage( &config, &agents, &PipelineStage::Coder, ) .map(|s| s.to_string()) .ok_or_else(|| { if config .agent .iter() .any(|a| agent_config_stage(a) == PipelineStage::Coder) { format!( "All coder agents are busy; story '{story_id}' has been \ queued in work/2_current/ and will be auto-assigned when \ one becomes available" ) } else { "No coder agent configured. Specify an agent_name explicitly." .to_string() } })? } } else { auto_assign::find_free_agent_for_stage( &config, &agents, &PipelineStage::Coder, ) .map(|s| s.to_string()) .ok_or_else(|| { if config .agent .iter() .any(|a| agent_config_stage(a) == PipelineStage::Coder) { format!( "All coder agents are busy; story '{story_id}' has been \ queued in work/2_current/ and will be auto-assigned when \ one becomes available" ) } else { "No coder agent configured. Specify an agent_name explicitly." .to_string() } })? } } }; key = composite_key(story_id, &resolved_name); // Check for duplicate assignment (same story + same agent already active). if let Some(agent) = agents.get(&key) && (agent.status == AgentStatus::Running || agent.status == AgentStatus::Pending) { return Err(format!( "Agent '{resolved_name}' for story '{story_id}' is already {}", agent.status )); } // Enforce single-stage concurrency: reject if there is already a // Running/Pending agent at the same pipeline stage for this story. // This prevents two coders (or two QA/mergemaster agents) from // corrupting each other's work in the same worktree. // Applies to both explicit and auto-selected agents; the Other // stage (supervisors, unknown agents) is exempt. let resolved_stage = config .find_agent(&resolved_name) .map(agent_config_stage) .unwrap_or_else(|| pipeline_stage(&resolved_name)); if resolved_stage != PipelineStage::Other && let Some(conflicting_name) = agents.iter().find_map(|(k, a)| { let k_story = k.rsplit_once(':').map(|(s, _)| s).unwrap_or(k); if k_story == story_id && a.agent_name != resolved_name && matches!(a.status, AgentStatus::Running | AgentStatus::Pending) { let a_stage = config .find_agent(&a.agent_name) .map(agent_config_stage) .unwrap_or_else(|| pipeline_stage(&a.agent_name)); if a_stage == resolved_stage { Some(a.agent_name.clone()) } else { None } } else { None } }) { return Err(format!( "Cannot start '{resolved_name}' on story '{story_id}': \ '{conflicting_name}' is already active at the same pipeline stage" )); } // Enforce single-instance concurrency for explicitly-named agents: // if this agent is already running on any other story, reject. // Auto-selected agents are already guaranteed idle by // find_free_agent_for_stage, so this check is only needed for // explicit requests. if agent_name.is_some() && let Some(busy_story) = agents.iter().find_map(|(k, a)| { if a.agent_name == resolved_name && matches!(a.status, AgentStatus::Running | AgentStatus::Pending) { Some( k.rsplit_once(':') .map(|(sid, _)| sid) .unwrap_or(k) .to_string(), ) } else { None } }) { return Err(format!( "Agent '{resolved_name}' is already running on story '{busy_story}'; \ story '{story_id}' will be picked up when the agent becomes available" )); } // Drain accumulated status events from the previous session before // replacing the entry with the new one. The drained items are // formatted and prepended to the first agent turn (story 736). prior_events = { let items = agents .get(&key) .and_then(|a| a.status_buffer.as_ref().map(|b| b.drain())) .unwrap_or_default(); crate::service::status::buffer::format_buffered_items(&items) }; agents.insert( key.clone(), StoryAgent { agent_name: resolved_name.clone(), status: AgentStatus::Pending, worktree_info: None, session_id: None, tx: tx.clone(), task_handle: None, event_log: event_log.clone(), completion: None, project_root: Some(project_root.to_path_buf()), log_session_id: Some(log_session_id.clone()), merge_failure_reported: false, throttled: false, termination_reason: None, status_buffer: Some(status_buffer), }, ); } let mut pending_guard = PendingGuard::new(self.agents.clone(), key.clone()); // Create persistent log writer (needs resolved_name, so must be after // the atomic resolution above). let log_writer = match AgentLogWriter::new(project_root, story_id, &resolved_name, &log_session_id) { Ok(w) => Some(Arc::new(Mutex::new(w))), Err(e) => { eprintln!( "[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}" ); None } }; // Notify WebSocket clients that a new agent is pending. Self::notify_agent_state_changed(&self.watcher_tx); let _ = tx.send(AgentEvent::Status { story_id: story_id.to_string(), agent_name: resolved_name.clone(), status: "pending".to_string(), }); // Extract inactivity timeout from the agent config before cloning config. let inactivity_timeout_secs = config .find_agent(&resolved_name) .map(|a| a.inactivity_timeout_secs) .unwrap_or(300); // If no explicit session_id_to_resume was provided, look up from the // persistent session store. The key includes the model so a model // change (e.g. sonnet → opus) produces a cache miss — intentional. let effective_session_id = session_id_to_resume.or_else(|| { let model = config .find_agent(&resolved_name) .and_then(|a| a.model.clone()) .unwrap_or_default(); crate::agents::session_store::lookup_session( project_root, story_id, &resolved_name, &model, ) }); // Clone all values needed inside the background spawn. // Spawn the background task. Worktree creation and agent launch happen here // so `start_agent` returns immediately after registering the agent as // Pending — non-blocking by design (story 157). let handle = tokio::spawn(spawn::run_agent_spawn( project_root.to_path_buf(), config.clone(), resume_context.map(str::to_string), effective_session_id, story_id.to_string(), resolved_name.clone(), tx.clone(), self.agents.clone(), key.clone(), event_log.clone(), self.port, log_writer.clone(), self.child_killers.clone(), self.watcher_tx.clone(), inactivity_timeout_secs, prior_events, )); // Store the task handle while the agent is still Pending. { let mut agents = self.agents.lock().map_err(|e| e.to_string())?; if let Some(agent) = agents.get_mut(&key) { agent.task_handle = Some(handle); } } // Agent successfully spawned — prevent the guard from removing the entry. pending_guard.disarm(); Ok(AgentInfo { story_id: story_id.to_string(), agent_name: resolved_name, status: AgentStatus::Pending, session_id: None, worktree_path: None, base_branch: None, completion: None, log_session_id: Some(log_session_id), throttled: false, termination_reason: None, }) } } #[cfg(test)] mod tests_concurrency; #[cfg(test)] mod tests_selection;