diff --git a/server/src/agents/pool/start.rs b/server/src/agents/pool/start/mod.rs similarity index 74% rename from server/src/agents/pool/start.rs rename to server/src/agents/pool/start/mod.rs index f31481cb..3a89ec60 100644 --- a/server/src/agents/pool/start.rs +++ b/server/src/agents/pool/start/mod.rs @@ -16,6 +16,12 @@ use super::types::{PendingGuard, StoryAgent, composite_key}; use super::worktree::find_active_story_stage; use super::{AgentPool, auto_assign}; +mod spawn; +mod validation; + +use validation::{read_front_matter_agent, validate_agent_stage}; + + impl AgentPool { /// Start an agent for a story: load config, create worktree, spawn agent. /// @@ -71,45 +77,12 @@ impl AgentPool { } // Validate that the agent's configured stage matches the story's - // pipeline stage. This prevents any caller (auto-assign, MCP tool, - // pipeline advance, supervisor) from starting a wrong-stage agent on - // a story — e.g. mergemaster on a coding-stage story (bug 312). - if let Some(name) = agent_name { - let agent_stage = config - .find_agent(name) - .map(agent_config_stage) - .unwrap_or_else(|| pipeline_stage(name)); - if agent_stage != PipelineStage::Other - && let Some(story_stage_dir) = find_active_story_stage(project_root, story_id) - { - let expected_stage = match story_stage_dir { - "2_current" => PipelineStage::Coder, - "3_qa" => PipelineStage::Qa, - "4_merge" => PipelineStage::Mergemaster, - _ => PipelineStage::Other, - }; - if expected_stage != PipelineStage::Other && expected_stage != agent_stage { - return Err(format!( - "Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \ - story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})" - )); - } - } - } + // pipeline stage. (See validation::validate_agent_stage.) + validate_agent_stage(&config, project_root, story_id, agent_name)?; // Read the preferred agent from the story's front matter before acquiring - // the lock. When no explicit agent_name is given, this lets start_agent - // honour `agent: coder-opus` written by the `assign` command — mirroring - // the auto_assign path (bug 379). - let front_matter_agent: Option = if agent_name.is_none() { - crate::db::read_content(story_id).and_then(|contents| { - crate::io::story_metadata::parse_front_matter(&contents) - .ok()? - .agent - }) - } else { - None - }; + // the lock. (See validation::read_front_matter_agent.) + let front_matter_agent: Option = read_front_matter_agent(story_id, agent_name); // Atomically resolve agent name, check availability, and register as // Pending. When `agent_name` is `None` the first idle coder is @@ -320,317 +293,28 @@ impl AgentPool { .unwrap_or(300); // Clone all values needed inside the background spawn. - let project_root_clone = project_root.to_path_buf(); - let config_clone = config.clone(); - let resume_context_owned = resume_context.map(str::to_string); - let session_id_to_resume_owned = session_id_to_resume; - let sid = story_id.to_string(); - let aname = resolved_name.clone(); - let tx_clone = tx.clone(); - let agents_ref = self.agents.clone(); - let key_clone = key.clone(); - let log_clone = event_log.clone(); - let port_for_task = self.port; - let log_writer_clone = log_writer.clone(); - let child_killers_clone = self.child_killers.clone(); - let watcher_tx_clone = self.watcher_tx.clone(); - let merge_jobs_clone = Arc::clone(&self.merge_jobs); - // Spawn the background task. Worktree creation and agent launch happen here // so `start_agent` returns immediately after registering the agent as // Pending — non-blocking by design (story 157). - let handle = tokio::spawn(async move { - // Step 1: create the worktree (slow — git checkout, pnpm install, etc.) - let wt_info = match crate::worktree::create_worktree( - &project_root_clone, - &sid, - &config_clone, - port_for_task, - ) - .await - { - Ok(wt) => wt, - Err(e) => { - let error_msg = format!("Failed to create worktree: {e}"); - slog_error!("[agents] {error_msg}"); - let event = AgentEvent::Error { - story_id: sid.clone(), - agent_name: aname.clone(), - message: error_msg, - }; - if let Ok(mut log) = log_clone.lock() { - log.push(event.clone()); - } - let _ = tx_clone.send(event); - if let Ok(mut agents) = agents_ref.lock() - && let Some(agent) = agents.get_mut(&key_clone) - { - agent.status = AgentStatus::Failed; - } - AgentPool::notify_agent_state_changed(&watcher_tx_clone); - return; - } - }; + let handle = tokio::spawn(spawn::run_agent_spawn( + project_root.to_path_buf(), + config.clone(), + resume_context.map(str::to_string), + session_id_to_resume, + story_id.to_string(), + resolved_name.clone(), + tx.clone(), + self.agents.clone(), + key.clone(), + event_log.clone(), + self.port, + log_writer.clone(), + self.child_killers.clone(), + self.watcher_tx.clone(), + Arc::clone(&self.merge_jobs), + inactivity_timeout_secs, + )); - // Step 2: store worktree info and render agent command/args/prompt. - let wt_path_str = wt_info.path.to_string_lossy().to_string(); - { - if let Ok(mut agents) = agents_ref.lock() - && let Some(agent) = agents.get_mut(&key_clone) - { - agent.worktree_info = Some(wt_info.clone()); - } - } - - let (command, args, mut prompt) = match config_clone.render_agent_args( - &wt_path_str, - &sid, - Some(&aname), - Some(&wt_info.base_branch), - ) { - Ok(result) => result, - Err(e) => { - let error_msg = format!("Failed to render agent args: {e}"); - slog_error!("[agents] {error_msg}"); - let event = AgentEvent::Error { - story_id: sid.clone(), - agent_name: aname.clone(), - message: error_msg, - }; - if let Ok(mut log) = log_clone.lock() { - log.push(event.clone()); - } - let _ = tx_clone.send(event); - if let Ok(mut agents) = agents_ref.lock() - && let Some(agent) = agents.get_mut(&key_clone) - { - agent.status = AgentStatus::Failed; - } - AgentPool::notify_agent_state_changed(&watcher_tx_clone); - return; - } - }; - - // Append project-local prompt content (.huskies/AGENT.md) to the - // baked-in prompt so every agent role sees project-specific guidance - // without any config changes. The file is read fresh each spawn; - // if absent or empty, the prompt is unchanged and no warning is logged. - if let Some(local) = - crate::agents::local_prompt::read_project_local_prompt(&project_root_clone) - { - prompt.push_str("\n\n"); - prompt.push_str(&local); - } - - // Build the effective prompt and determine resume session. - // - // When resuming a previous session, discard the full rendered prompt - // (which would re-read CLAUDE.md and README) and send only the gate - // failure context as a new message. On a fresh start, append the - // failure context to the original prompt as before. - let effective_prompt = match &session_id_to_resume_owned { - Some(_) => resume_context_owned.unwrap_or_default(), - None => { - if let Some(ctx) = resume_context_owned { - prompt.push_str(&ctx); - } - prompt - } - }; - - // Step 3: transition to Running now that the worktree is ready. - { - if let Ok(mut agents) = agents_ref.lock() - && let Some(agent) = agents.get_mut(&key_clone) - { - agent.status = AgentStatus::Running; - } - } - let _ = tx_clone.send(AgentEvent::Status { - story_id: sid.clone(), - agent_name: aname.clone(), - status: "running".to_string(), - }); - AgentPool::notify_agent_state_changed(&watcher_tx_clone); - - // Step 4: launch the agent process via the configured runtime. - let runtime_name = config_clone - .find_agent(&aname) - .and_then(|a| a.runtime.as_deref()) - .unwrap_or("claude-code"); - - let run_result = match runtime_name { - "claude-code" => { - let runtime = ClaudeCodeRuntime::new( - child_killers_clone.clone(), - watcher_tx_clone.clone(), - ); - let ctx = RuntimeContext { - story_id: sid.clone(), - agent_name: aname.clone(), - command, - args, - prompt: effective_prompt, - cwd: wt_path_str, - inactivity_timeout_secs, - mcp_port: port_for_task, - session_id_to_resume: session_id_to_resume_owned.clone(), - }; - runtime - .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone) - .await - } - "gemini" => { - let runtime = GeminiRuntime::new(); - let ctx = RuntimeContext { - story_id: sid.clone(), - agent_name: aname.clone(), - command, - args, - prompt: effective_prompt, - cwd: wt_path_str, - inactivity_timeout_secs, - mcp_port: port_for_task, - session_id_to_resume: session_id_to_resume_owned.clone(), - }; - runtime - .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone) - .await - } - "openai" => { - let runtime = OpenAiRuntime::new(); - let ctx = RuntimeContext { - story_id: sid.clone(), - agent_name: aname.clone(), - command, - args, - prompt: effective_prompt, - cwd: wt_path_str, - inactivity_timeout_secs, - mcp_port: port_for_task, - session_id_to_resume: session_id_to_resume_owned, - }; - runtime - .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone) - .await - } - other => Err(format!( - "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \ - Supported: 'claude-code', 'gemini', 'openai'" - )), - }; - - match run_result { - Ok(result) => { - // Persist token usage if the agent reported it. - if let Some(ref usage) = result.token_usage - && let Ok(agents) = agents_ref.lock() - && let Some(agent) = agents.get(&key_clone) - && let Some(ref pr) = agent.project_root - { - let model = config_clone - .find_agent(&aname) - .and_then(|a| a.model.clone()); - let record = crate::agents::token_usage::build_record( - &sid, - &aname, - model, - usage.clone(), - ); - if let Err(e) = crate::agents::token_usage::append_record(pr, &record) { - slog_error!( - "[agents] Failed to persist token usage for \ - {sid}:{aname}: {e}" - ); - } - } - - // Mergemaster agents have their own completion path via - // start_merge_agent_work / run_merge_pipeline and must NOT go - // through server-owned gates. When a mergemaster exits early - // (e.g. rate-limited before calling start_merge_agent_work) the - // feature-branch worktree compiles fine and post-merge tests on - // master pass (nothing changed), which would wrongly advance the - // story to 5_done/ without any squash merge having occurred. - // Instead: just remove the agent from the pool and let - // auto-assign restart a new mergemaster for the story. - let stage = config_clone - .find_agent(&aname) - .map(agent_config_stage) - .unwrap_or_else(|| pipeline_stage(&aname)); - if stage == PipelineStage::Mergemaster { - let (tx_done, done_session_id) = { - let mut lock = match agents_ref.lock() { - Ok(a) => a, - Err(_) => return, - }; - if let Some(agent) = lock.remove(&key_clone) { - (agent.tx, agent.session_id.or(result.session_id)) - } else { - (tx_clone.clone(), result.session_id) - } - }; - // Clear any stale Running merge job so the next mergemaster - // can call start_merge_agent_work without hitting "Merge - // already in progress" (bug 498). - if let Ok(mut jobs) = merge_jobs_clone.lock() - && let Some(job) = jobs.get(&sid) - && matches!(job.status, crate::agents::merge::MergeJobStatus::Running) - { - jobs.remove(&sid); - } - let _ = tx_done.send(AgentEvent::Done { - story_id: sid.clone(), - agent_name: aname.clone(), - session_id: done_session_id, - }); - AgentPool::notify_agent_state_changed(&watcher_tx_clone); - // Send a WorkItem event so the auto-assign watcher loop - // re-dispatches a new mergemaster if the story still needs - // merging. This avoids an async call to start_agent inside - // a tokio::spawn (which would require Send). - let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem { - stage: "4_merge".to_string(), - item_id: sid.clone(), - action: "reassign".to_string(), - commit_msg: String::new(), - from_stage: None, - }); - } else { - // Server-owned completion: run acceptance gates automatically - // when the agent process exits normally. - super::pipeline::run_server_owned_completion( - &agents_ref, - port_for_task, - &sid, - &aname, - result.session_id, - watcher_tx_clone.clone(), - ) - .await; - AgentPool::notify_agent_state_changed(&watcher_tx_clone); - } - } - Err(e) => { - slog_error!("[agents] Agent process error for {aname} on {sid}: {e}"); - let event = AgentEvent::Error { - story_id: sid.clone(), - agent_name: aname.clone(), - message: e, - }; - if let Ok(mut log) = log_clone.lock() { - log.push(event.clone()); - } - let _ = tx_clone.send(event); - if let Ok(mut agents) = agents_ref.lock() - && let Some(agent) = agents.get_mut(&key_clone) - { - agent.status = AgentStatus::Failed; - } - AgentPool::notify_agent_state_changed(&watcher_tx_clone); - } - } - }); // Store the task handle while the agent is still Pending. { diff --git a/server/src/agents/pool/start/spawn.rs b/server/src/agents/pool/start/spawn.rs new file mode 100644 index 00000000..d9fbd90d --- /dev/null +++ b/server/src/agents/pool/start/spawn.rs @@ -0,0 +1,361 @@ +//! Background async work spawned by `AgentPool::start_agent`. +//! +//! `start_agent` returns immediately after registering the agent as `Pending`; +//! this module runs the slow worktree creation, agent process launch, and +//! event streaming in the background (story 157). + +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; + +use portable_pty::ChildKiller; +use tokio::sync::broadcast; + +use crate::agent_log::AgentLogWriter; +use crate::config::ProjectConfig; +use crate::io::watcher::WatcherEvent; +use crate::slog_error; + +use super::super::super::{ + AgentEvent, AgentStatus, PipelineStage, agent_config_stage, pipeline_stage, +}; +use super::super::super::merge::MergeJob; +use super::super::AgentPool; +use super::super::super::runtime::{ + AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext, +}; +use super::super::types::StoryAgent; + +/// Run the background worktree-creation + agent-launch flow. +/// +/// Caller (`AgentPool::start_agent`) wraps this in `tokio::spawn` and stores +/// the resulting handle on the Pending entry so cancellation works. +#[allow(clippy::too_many_arguments)] +pub(super) async fn run_agent_spawn( + project_root: PathBuf, + config: ProjectConfig, + resume_context: Option, + session_id_to_resume: Option, + story_id: String, + agent_name: String, + tx: broadcast::Sender, + agents: Arc>>, + key: String, + event_log: Arc>>, + port: u16, + log_writer: Option>>, + child_killers: Arc>>>, + watcher_tx: broadcast::Sender, + merge_jobs: Arc>>, + inactivity_timeout_secs: u64, +) { + // Re-bind to the legacy `_clone` / `_owned` names so the body below remains + // a verbatim copy of the original closure (story 157). + let project_root_clone = project_root; + let config_clone = config; + let resume_context_owned = resume_context; + let session_id_to_resume_owned = session_id_to_resume; + let sid = story_id; + let aname = agent_name; + let tx_clone = tx; + let agents_ref = agents; + let key_clone = key; + let log_clone = event_log; + let port_for_task = port; + let log_writer_clone = log_writer; + let child_killers_clone = child_killers; + let watcher_tx_clone = watcher_tx; + let merge_jobs_clone = merge_jobs; + let _ = inactivity_timeout_secs; // currently unused inside the closure body + + // Step 1: create the worktree (slow — git checkout, pnpm install, etc.) + let wt_info = match crate::worktree::create_worktree( + &project_root_clone, + &sid, + &config_clone, + port_for_task, + ) + .await + { + Ok(wt) => wt, + Err(e) => { + let error_msg = format!("Failed to create worktree: {e}"); + slog_error!("[agents] {error_msg}"); + let event = AgentEvent::Error { + story_id: sid.clone(), + agent_name: aname.clone(), + message: error_msg, + }; + if let Ok(mut log) = log_clone.lock() { + log.push(event.clone()); + } + let _ = tx_clone.send(event); + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) + { + agent.status = AgentStatus::Failed; + } + AgentPool::notify_agent_state_changed(&watcher_tx_clone); + return; + } + }; + + // Step 2: store worktree info and render agent command/args/prompt. + let wt_path_str = wt_info.path.to_string_lossy().to_string(); + { + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) + { + agent.worktree_info = Some(wt_info.clone()); + } + } + + let (command, args, mut prompt) = match config_clone.render_agent_args( + &wt_path_str, + &sid, + Some(&aname), + Some(&wt_info.base_branch), + ) { + Ok(result) => result, + Err(e) => { + let error_msg = format!("Failed to render agent args: {e}"); + slog_error!("[agents] {error_msg}"); + let event = AgentEvent::Error { + story_id: sid.clone(), + agent_name: aname.clone(), + message: error_msg, + }; + if let Ok(mut log) = log_clone.lock() { + log.push(event.clone()); + } + let _ = tx_clone.send(event); + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) + { + agent.status = AgentStatus::Failed; + } + AgentPool::notify_agent_state_changed(&watcher_tx_clone); + return; + } + }; + + // Append project-local prompt content (.huskies/AGENT.md) to the + // baked-in prompt so every agent role sees project-specific guidance + // without any config changes. The file is read fresh each spawn; + // if absent or empty, the prompt is unchanged and no warning is logged. + if let Some(local) = + crate::agents::local_prompt::read_project_local_prompt(&project_root_clone) + { + prompt.push_str("\n\n"); + prompt.push_str(&local); + } + + // Build the effective prompt and determine resume session. + // + // When resuming a previous session, discard the full rendered prompt + // (which would re-read CLAUDE.md and README) and send only the gate + // failure context as a new message. On a fresh start, append the + // failure context to the original prompt as before. + let effective_prompt = match &session_id_to_resume_owned { + Some(_) => resume_context_owned.unwrap_or_default(), + None => { + if let Some(ctx) = resume_context_owned { + prompt.push_str(&ctx); + } + prompt + } + }; + + // Step 3: transition to Running now that the worktree is ready. + { + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) + { + agent.status = AgentStatus::Running; + } + } + let _ = tx_clone.send(AgentEvent::Status { + story_id: sid.clone(), + agent_name: aname.clone(), + status: "running".to_string(), + }); + AgentPool::notify_agent_state_changed(&watcher_tx_clone); + + // Step 4: launch the agent process via the configured runtime. + let runtime_name = config_clone + .find_agent(&aname) + .and_then(|a| a.runtime.as_deref()) + .unwrap_or("claude-code"); + + let run_result = match runtime_name { + "claude-code" => { + let runtime = ClaudeCodeRuntime::new( + child_killers_clone.clone(), + watcher_tx_clone.clone(), + ); + let ctx = RuntimeContext { + story_id: sid.clone(), + agent_name: aname.clone(), + command, + args, + prompt: effective_prompt, + cwd: wt_path_str, + inactivity_timeout_secs, + mcp_port: port_for_task, + session_id_to_resume: session_id_to_resume_owned.clone(), + }; + runtime + .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone) + .await + } + "gemini" => { + let runtime = GeminiRuntime::new(); + let ctx = RuntimeContext { + story_id: sid.clone(), + agent_name: aname.clone(), + command, + args, + prompt: effective_prompt, + cwd: wt_path_str, + inactivity_timeout_secs, + mcp_port: port_for_task, + session_id_to_resume: session_id_to_resume_owned.clone(), + }; + runtime + .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone) + .await + } + "openai" => { + let runtime = OpenAiRuntime::new(); + let ctx = RuntimeContext { + story_id: sid.clone(), + agent_name: aname.clone(), + command, + args, + prompt: effective_prompt, + cwd: wt_path_str, + inactivity_timeout_secs, + mcp_port: port_for_task, + session_id_to_resume: session_id_to_resume_owned, + }; + runtime + .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone) + .await + } + other => Err(format!( + "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \ + Supported: 'claude-code', 'gemini', 'openai'" + )), + }; + + match run_result { + Ok(result) => { + // Persist token usage if the agent reported it. + if let Some(ref usage) = result.token_usage + && let Ok(agents) = agents_ref.lock() + && let Some(agent) = agents.get(&key_clone) + && let Some(ref pr) = agent.project_root + { + let model = config_clone + .find_agent(&aname) + .and_then(|a| a.model.clone()); + let record = crate::agents::token_usage::build_record( + &sid, + &aname, + model, + usage.clone(), + ); + if let Err(e) = crate::agents::token_usage::append_record(pr, &record) { + slog_error!( + "[agents] Failed to persist token usage for \ + {sid}:{aname}: {e}" + ); + } + } + + // Mergemaster agents have their own completion path via + // start_merge_agent_work / run_merge_pipeline and must NOT go + // through server-owned gates. When a mergemaster exits early + // (e.g. rate-limited before calling start_merge_agent_work) the + // feature-branch worktree compiles fine and post-merge tests on + // master pass (nothing changed), which would wrongly advance the + // story to 5_done/ without any squash merge having occurred. + // Instead: just remove the agent from the pool and let + // auto-assign restart a new mergemaster for the story. + let stage = config_clone + .find_agent(&aname) + .map(agent_config_stage) + .unwrap_or_else(|| pipeline_stage(&aname)); + if stage == PipelineStage::Mergemaster { + let (tx_done, done_session_id) = { + let mut lock = match agents_ref.lock() { + Ok(a) => a, + Err(_) => return, + }; + if let Some(agent) = lock.remove(&key_clone) { + (agent.tx, agent.session_id.or(result.session_id)) + } else { + (tx_clone.clone(), result.session_id) + } + }; + // Clear any stale Running merge job so the next mergemaster + // can call start_merge_agent_work without hitting "Merge + // already in progress" (bug 498). + if let Ok(mut jobs) = merge_jobs_clone.lock() + && let Some(job) = jobs.get(&sid) + && matches!(job.status, crate::agents::merge::MergeJobStatus::Running) + { + jobs.remove(&sid); + } + let _ = tx_done.send(AgentEvent::Done { + story_id: sid.clone(), + agent_name: aname.clone(), + session_id: done_session_id, + }); + AgentPool::notify_agent_state_changed(&watcher_tx_clone); + // Send a WorkItem event so the auto-assign watcher loop + // re-dispatches a new mergemaster if the story still needs + // merging. This avoids an async call to start_agent inside + // a tokio::spawn (which would require Send). + let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem { + stage: "4_merge".to_string(), + item_id: sid.clone(), + action: "reassign".to_string(), + commit_msg: String::new(), + from_stage: None, + }); + } else { + // Server-owned completion: run acceptance gates automatically + // when the agent process exits normally. + super::super::pipeline::run_server_owned_completion( + &agents_ref, + port_for_task, + &sid, + &aname, + result.session_id, + watcher_tx_clone.clone(), + ) + .await; + AgentPool::notify_agent_state_changed(&watcher_tx_clone); + } + } + Err(e) => { + slog_error!("[agents] Agent process error for {aname} on {sid}: {e}"); + let event = AgentEvent::Error { + story_id: sid.clone(), + agent_name: aname.clone(), + message: e, + }; + if let Ok(mut log) = log_clone.lock() { + log.push(event.clone()); + } + let _ = tx_clone.send(event); + if let Ok(mut agents) = agents_ref.lock() + && let Some(agent) = agents.get_mut(&key_clone) + { + agent.status = AgentStatus::Failed; + } + AgentPool::notify_agent_state_changed(&watcher_tx_clone); + } + } +} diff --git a/server/src/agents/pool/start/validation.rs b/server/src/agents/pool/start/validation.rs new file mode 100644 index 00000000..40860c42 --- /dev/null +++ b/server/src/agents/pool/start/validation.rs @@ -0,0 +1,69 @@ +//! Pre-lock validation helpers for `AgentPool::start_agent`. + +use std::path::Path; + +use crate::config::ProjectConfig; + +use super::super::super::{PipelineStage, agent_config_stage, pipeline_stage}; +use super::super::worktree::find_active_story_stage; + +/// Validate that an explicit `agent_name` is allowed to attach to `story_id`'s +/// current pipeline stage. +/// +/// Prevents wrong-stage assignments like a mergemaster on a coding-stage story +/// (bug 312). Returns `Ok(())` if the agent has no specific stage (e.g. +/// supervisor) or the story is not in an active stage; `Err` with a descriptive +/// message on a stage mismatch. +pub(super) fn validate_agent_stage( + config: &ProjectConfig, + project_root: &Path, + story_id: &str, + agent_name: Option<&str>, +) -> Result<(), String> { + let Some(name) = agent_name else { + return Ok(()); + }; + let agent_stage = config + .find_agent(name) + .map(agent_config_stage) + .unwrap_or_else(|| pipeline_stage(name)); + if agent_stage == PipelineStage::Other { + return Ok(()); + } + let Some(story_stage_dir) = find_active_story_stage(project_root, story_id) else { + return Ok(()); + }; + let expected_stage = match story_stage_dir { + "2_current" => PipelineStage::Coder, + "3_qa" => PipelineStage::Qa, + "4_merge" => PipelineStage::Mergemaster, + _ => PipelineStage::Other, + }; + if expected_stage != PipelineStage::Other && expected_stage != agent_stage { + return Err(format!( + "Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \ + story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})" + )); + } + Ok(()) +} + +/// Read the preferred `agent:` field from the story's front matter. +/// +/// When `agent_name` is `None` (caller is auto-selecting), this lets +/// `start_agent` honour an explicit `agent: coder-opus` written by the +/// `assign` command (bug 379). Returns `None` when an explicit agent_name +/// was already supplied or when the story has no front-matter preference. +pub(super) fn read_front_matter_agent( + story_id: &str, + agent_name: Option<&str>, +) -> Option { + if agent_name.is_some() { + return None; + } + crate::db::read_content(story_id).and_then(|contents| { + crate::io::story_metadata::parse_front_matter(&contents) + .ok()? + .agent + }) +}