refactor: split agents/pool/start.rs into mod.rs + validation.rs + spawn.rs

The 1630-line start.rs is split into a sub-module directory: - validation.rs: validate_agent_stage + read_front_matter_agent helpers (69 lines) - spawn.rs: run_agent_spawn — the background async work that was inlined as a tokio::spawn closure body inside start_agent (359 lines) - mod.rs: AgentPool::start_agent orchestrator + tests (1062 lines) Stage validation and front-matter agent reading are pre-lock pure helpers that naturally extract. The spawn closure body becomes a free async fn that takes the previously-cloned values as parameters; rebound to the original _clone / _owned names at the top of the body so the actual work code is a verbatim copy. No behaviour change. All 23 start tests pass; full suite green.
2026-04-26 22:12:04 +00:00
parent 40f1794d41
commit eca15b4ee7
3 changed files with 458 additions and 344 deletions
@@ -16,6 +16,12 @@ use super::types::{PendingGuard, StoryAgent, composite_key};
 use super::worktree::find_active_story_stage;
 use super::{AgentPool, auto_assign};
 mod spawn;
 mod validation;
 use validation::{read_front_matter_agent, validate_agent_stage};
 impl AgentPool {
    /// Start an agent for a story: load config, create worktree, spawn agent.
    ///
@@ -71,45 +77,12 @@ impl AgentPool {
        }
        // Validate that the agent's configured stage matches the story's
-        // pipeline stage.  This prevents any caller (auto-assign, MCP tool,
+        // pipeline stage. (See validation::validate_agent_stage.)
-        // pipeline advance, supervisor) from starting a wrong-stage agent on
+        validate_agent_stage(&config, project_root, story_id, agent_name)?;
        // a story — e.g. mergemaster on a coding-stage story (bug 312).
        if let Some(name) = agent_name {
            let agent_stage = config
                .find_agent(name)
                .map(agent_config_stage)
                .unwrap_or_else(|| pipeline_stage(name));
            if agent_stage != PipelineStage::Other
                && let Some(story_stage_dir) = find_active_story_stage(project_root, story_id)
            {
                let expected_stage = match story_stage_dir {
                    "2_current" => PipelineStage::Coder,
                    "3_qa" => PipelineStage::Qa,
                    "4_merge" => PipelineStage::Mergemaster,
                    _ => PipelineStage::Other,
                };
                if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
                    return Err(format!(
                        "Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
                         story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
                    ));
                }
            }
        }
        // Read the preferred agent from the story's front matter before acquiring
-        // the lock.  When no explicit agent_name is given, this lets start_agent
+        // the lock. (See validation::read_front_matter_agent.)
-        // honour `agent: coder-opus` written by the `assign` command — mirroring
+        let front_matter_agent: Option<String> = read_front_matter_agent(story_id, agent_name);
        // the auto_assign path (bug 379).
        let front_matter_agent: Option<String> = if agent_name.is_none() {
            crate::db::read_content(story_id).and_then(|contents| {
                crate::io::story_metadata::parse_front_matter(&contents)
                    .ok()?
                    .agent
            })
        } else {
            None
        };
        // Atomically resolve agent name, check availability, and register as
        // Pending.  When `agent_name` is `None` the first idle coder is
@@ -320,317 +293,28 @@ impl AgentPool {
            .unwrap_or(300);
        // Clone all values needed inside the background spawn.
        let project_root_clone = project_root.to_path_buf();
        let config_clone = config.clone();
        let resume_context_owned = resume_context.map(str::to_string);
        let session_id_to_resume_owned = session_id_to_resume;
        let sid = story_id.to_string();
        let aname = resolved_name.clone();
        let tx_clone = tx.clone();
        let agents_ref = self.agents.clone();
        let key_clone = key.clone();
        let log_clone = event_log.clone();
        let port_for_task = self.port;
        let log_writer_clone = log_writer.clone();
        let child_killers_clone = self.child_killers.clone();
        let watcher_tx_clone = self.watcher_tx.clone();
        let merge_jobs_clone = Arc::clone(&self.merge_jobs);
        // Spawn the background task. Worktree creation and agent launch happen here
        // so `start_agent` returns immediately after registering the agent as
        // Pending — non-blocking by design (story 157).
-        let handle = tokio::spawn(async move {
+        let handle = tokio::spawn(spawn::run_agent_spawn(
-            // Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
+            project_root.to_path_buf(),
-            let wt_info = match crate::worktree::create_worktree(
+            config.clone(),
-                &project_root_clone,
+            resume_context.map(str::to_string),
-                &sid,
+            session_id_to_resume,
-                &config_clone,
+            story_id.to_string(),
-                port_for_task,
+            resolved_name.clone(),
-            )
+            tx.clone(),
-            .await
+            self.agents.clone(),
-            {
+            key.clone(),
-                Ok(wt) => wt,
+            event_log.clone(),
-                Err(e) => {
+            self.port,
-                    let error_msg = format!("Failed to create worktree: {e}");
+            log_writer.clone(),
-                    slog_error!("[agents] {error_msg}");
+            self.child_killers.clone(),
-                    let event = AgentEvent::Error {
+            self.watcher_tx.clone(),
-                        story_id: sid.clone(),
+            Arc::clone(&self.merge_jobs),
                        agent_name: aname.clone(),
                        message: error_msg,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    return;
                }
            };
            // Step 2: store worktree info and render agent command/args/prompt.
            let wt_path_str = wt_info.path.to_string_lossy().to_string();
            {
                if let Ok(mut agents) = agents_ref.lock()
                    && let Some(agent) = agents.get_mut(&key_clone)
                {
                    agent.worktree_info = Some(wt_info.clone());
                }
            }
            let (command, args, mut prompt) = match config_clone.render_agent_args(
                &wt_path_str,
                &sid,
                Some(&aname),
                Some(&wt_info.base_branch),
            ) {
                Ok(result) => result,
                Err(e) => {
                    let error_msg = format!("Failed to render agent args: {e}");
                    slog_error!("[agents] {error_msg}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: error_msg,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    return;
                }
            };
            // Append project-local prompt content (.huskies/AGENT.md) to the
            // baked-in prompt so every agent role sees project-specific guidance
            // without any config changes.  The file is read fresh each spawn;
            // if absent or empty, the prompt is unchanged and no warning is logged.
            if let Some(local) =
                crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
            {
                prompt.push_str("\n\n");
                prompt.push_str(&local);
            }
            // Build the effective prompt and determine resume session.
            //
            // When resuming a previous session, discard the full rendered prompt
            // (which would re-read CLAUDE.md and README) and send only the gate
            // failure context as a new message.  On a fresh start, append the
            // failure context to the original prompt as before.
            let effective_prompt = match &session_id_to_resume_owned {
                Some(_) => resume_context_owned.unwrap_or_default(),
                None => {
                    if let Some(ctx) = resume_context_owned {
                        prompt.push_str(&ctx);
                    }
                    prompt
                }
            };
            // Step 3: transition to Running now that the worktree is ready.
            {
                if let Ok(mut agents) = agents_ref.lock()
                    && let Some(agent) = agents.get_mut(&key_clone)
                {
                    agent.status = AgentStatus::Running;
                }
            }
            let _ = tx_clone.send(AgentEvent::Status {
                story_id: sid.clone(),
                agent_name: aname.clone(),
                status: "running".to_string(),
            });
            AgentPool::notify_agent_state_changed(&watcher_tx_clone);
            // Step 4: launch the agent process via the configured runtime.
            let runtime_name = config_clone
                .find_agent(&aname)
                .and_then(|a| a.runtime.as_deref())
                .unwrap_or("claude-code");
            let run_result = match runtime_name {
                "claude-code" => {
                    let runtime = ClaudeCodeRuntime::new(
                        child_killers_clone.clone(),
                        watcher_tx_clone.clone(),
                    );
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
            inactivity_timeout_secs,
-                        mcp_port: port_for_task,
+        ));
                        session_id_to_resume: session_id_to_resume_owned.clone(),
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "gemini" => {
                    let runtime = GeminiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned.clone(),
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "openai" => {
                    let runtime = OpenAiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned,
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                other => Err(format!(
                    "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
                     Supported: 'claude-code', 'gemini', 'openai'"
                )),
            };
            match run_result {
                Ok(result) => {
                    // Persist token usage if the agent reported it.
                    if let Some(ref usage) = result.token_usage
                        && let Ok(agents) = agents_ref.lock()
                        && let Some(agent) = agents.get(&key_clone)
                        && let Some(ref pr) = agent.project_root
                    {
                        let model = config_clone
                            .find_agent(&aname)
                            .and_then(|a| a.model.clone());
                        let record = crate::agents::token_usage::build_record(
                            &sid,
                            &aname,
                            model,
                            usage.clone(),
                        );
                        if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
                            slog_error!(
                                "[agents] Failed to persist token usage for \
                                 {sid}:{aname}: {e}"
                            );
                        }
                    }
                    // Mergemaster agents have their own completion path via
                    // start_merge_agent_work / run_merge_pipeline and must NOT go
                    // through server-owned gates.  When a mergemaster exits early
                    // (e.g. rate-limited before calling start_merge_agent_work) the
                    // feature-branch worktree compiles fine and post-merge tests on
                    // master pass (nothing changed), which would wrongly advance the
                    // story to 5_done/ without any squash merge having occurred.
                    // Instead: just remove the agent from the pool and let
                    // auto-assign restart a new mergemaster for the story.
                    let stage = config_clone
                        .find_agent(&aname)
                        .map(agent_config_stage)
                        .unwrap_or_else(|| pipeline_stage(&aname));
                    if stage == PipelineStage::Mergemaster {
                        let (tx_done, done_session_id) = {
                            let mut lock = match agents_ref.lock() {
                                Ok(a) => a,
                                Err(_) => return,
                            };
                            if let Some(agent) = lock.remove(&key_clone) {
                                (agent.tx, agent.session_id.or(result.session_id))
                            } else {
                                (tx_clone.clone(), result.session_id)
                            }
                        };
                        // Clear any stale Running merge job so the next mergemaster
                        // can call start_merge_agent_work without hitting "Merge
                        // already in progress" (bug 498).
                        if let Ok(mut jobs) = merge_jobs_clone.lock()
                            && let Some(job) = jobs.get(&sid)
                            && matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
                        {
                            jobs.remove(&sid);
                        }
                        let _ = tx_done.send(AgentEvent::Done {
                            story_id: sid.clone(),
                            agent_name: aname.clone(),
                            session_id: done_session_id,
                        });
                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                        // Send a WorkItem event so the auto-assign watcher loop
                        // re-dispatches a new mergemaster if the story still needs
                        // merging.  This avoids an async call to start_agent inside
                        // a tokio::spawn (which would require Send).
                        let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
                            stage: "4_merge".to_string(),
                            item_id: sid.clone(),
                            action: "reassign".to_string(),
                            commit_msg: String::new(),
                            from_stage: None,
                        });
                    } else {
                        // Server-owned completion: run acceptance gates automatically
                        // when the agent process exits normally.
                        super::pipeline::run_server_owned_completion(
                            &agents_ref,
                            port_for_task,
                            &sid,
                            &aname,
                            result.session_id,
                            watcher_tx_clone.clone(),
                        )
                        .await;
                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    }
                }
                Err(e) => {
                    slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: e,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                }
            }
        });
        // Store the task handle while the agent is still Pending.
        {
@@ -0,0 +1,361 @@
 //! Background async work spawned by `AgentPool::start_agent`.
 //!
 //! `start_agent` returns immediately after registering the agent as `Pending`;
 //! this module runs the slow worktree creation, agent process launch, and
 //! event streaming in the background (story 157).
 use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
 use portable_pty::ChildKiller;
 use tokio::sync::broadcast;
 use crate::agent_log::AgentLogWriter;
 use crate::config::ProjectConfig;
 use crate::io::watcher::WatcherEvent;
 use crate::slog_error;
 use super::super::super::{
    AgentEvent, AgentStatus, PipelineStage, agent_config_stage, pipeline_stage,
 };
 use super::super::super::merge::MergeJob;
 use super::super::AgentPool;
 use super::super::super::runtime::{
    AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext,
 };
 use super::super::types::StoryAgent;
 /// Run the background worktree-creation + agent-launch flow.
 ///
 /// Caller (`AgentPool::start_agent`) wraps this in `tokio::spawn` and stores
 /// the resulting handle on the Pending entry so cancellation works.
 #[allow(clippy::too_many_arguments)]
 pub(super) async fn run_agent_spawn(
    project_root: PathBuf,
    config: ProjectConfig,
    resume_context: Option<String>,
    session_id_to_resume: Option<String>,
    story_id: String,
    agent_name: String,
    tx: broadcast::Sender<AgentEvent>,
    agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
    key: String,
    event_log: Arc<Mutex<Vec<AgentEvent>>>,
    port: u16,
    log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
    watcher_tx: broadcast::Sender<WatcherEvent>,
    merge_jobs: Arc<Mutex<HashMap<String, MergeJob>>>,
    inactivity_timeout_secs: u64,
 ) {
    // Re-bind to the legacy `_clone` / `_owned` names so the body below remains
    // a verbatim copy of the original closure (story 157).
    let project_root_clone = project_root;
    let config_clone = config;
    let resume_context_owned = resume_context;
    let session_id_to_resume_owned = session_id_to_resume;
    let sid = story_id;
    let aname = agent_name;
    let tx_clone = tx;
    let agents_ref = agents;
    let key_clone = key;
    let log_clone = event_log;
    let port_for_task = port;
    let log_writer_clone = log_writer;
    let child_killers_clone = child_killers;
    let watcher_tx_clone = watcher_tx;
    let merge_jobs_clone = merge_jobs;
    let _ = inactivity_timeout_secs;  // currently unused inside the closure body
            // Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
            let wt_info = match crate::worktree::create_worktree(
                &project_root_clone,
                &sid,
                &config_clone,
                port_for_task,
            )
            .await
            {
                Ok(wt) => wt,
                Err(e) => {
                    let error_msg = format!("Failed to create worktree: {e}");
                    slog_error!("[agents] {error_msg}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: error_msg,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    return;
                }
            };
            // Step 2: store worktree info and render agent command/args/prompt.
            let wt_path_str = wt_info.path.to_string_lossy().to_string();
            {
                if let Ok(mut agents) = agents_ref.lock()
                    && let Some(agent) = agents.get_mut(&key_clone)
                {
                    agent.worktree_info = Some(wt_info.clone());
                }
            }
            let (command, args, mut prompt) = match config_clone.render_agent_args(
                &wt_path_str,
                &sid,
                Some(&aname),
                Some(&wt_info.base_branch),
            ) {
                Ok(result) => result,
                Err(e) => {
                    let error_msg = format!("Failed to render agent args: {e}");
                    slog_error!("[agents] {error_msg}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: error_msg,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    return;
                }
            };
            // Append project-local prompt content (.huskies/AGENT.md) to the
            // baked-in prompt so every agent role sees project-specific guidance
            // without any config changes.  The file is read fresh each spawn;
            // if absent or empty, the prompt is unchanged and no warning is logged.
            if let Some(local) =
                crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
            {
                prompt.push_str("\n\n");
                prompt.push_str(&local);
            }
            // Build the effective prompt and determine resume session.
            //
            // When resuming a previous session, discard the full rendered prompt
            // (which would re-read CLAUDE.md and README) and send only the gate
            // failure context as a new message.  On a fresh start, append the
            // failure context to the original prompt as before.
            let effective_prompt = match &session_id_to_resume_owned {
                Some(_) => resume_context_owned.unwrap_or_default(),
                None => {
                    if let Some(ctx) = resume_context_owned {
                        prompt.push_str(&ctx);
                    }
                    prompt
                }
            };
            // Step 3: transition to Running now that the worktree is ready.
            {
                if let Ok(mut agents) = agents_ref.lock()
                    && let Some(agent) = agents.get_mut(&key_clone)
                {
                    agent.status = AgentStatus::Running;
                }
            }
            let _ = tx_clone.send(AgentEvent::Status {
                story_id: sid.clone(),
                agent_name: aname.clone(),
                status: "running".to_string(),
            });
            AgentPool::notify_agent_state_changed(&watcher_tx_clone);
            // Step 4: launch the agent process via the configured runtime.
            let runtime_name = config_clone
                .find_agent(&aname)
                .and_then(|a| a.runtime.as_deref())
                .unwrap_or("claude-code");
            let run_result = match runtime_name {
                "claude-code" => {
                    let runtime = ClaudeCodeRuntime::new(
                        child_killers_clone.clone(),
                        watcher_tx_clone.clone(),
                    );
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned.clone(),
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "gemini" => {
                    let runtime = GeminiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned.clone(),
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "openai" => {
                    let runtime = OpenAiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned,
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                other => Err(format!(
                    "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
                     Supported: 'claude-code', 'gemini', 'openai'"
                )),
            };
            match run_result {
                Ok(result) => {
                    // Persist token usage if the agent reported it.
                    if let Some(ref usage) = result.token_usage
                        && let Ok(agents) = agents_ref.lock()
                        && let Some(agent) = agents.get(&key_clone)
                        && let Some(ref pr) = agent.project_root
                    {
                        let model = config_clone
                            .find_agent(&aname)
                            .and_then(|a| a.model.clone());
                        let record = crate::agents::token_usage::build_record(
                            &sid,
                            &aname,
                            model,
                            usage.clone(),
                        );
                        if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
                            slog_error!(
                                "[agents] Failed to persist token usage for \
                                 {sid}:{aname}: {e}"
                            );
                        }
                    }
                    // Mergemaster agents have their own completion path via
                    // start_merge_agent_work / run_merge_pipeline and must NOT go
                    // through server-owned gates.  When a mergemaster exits early
                    // (e.g. rate-limited before calling start_merge_agent_work) the
                    // feature-branch worktree compiles fine and post-merge tests on
                    // master pass (nothing changed), which would wrongly advance the
                    // story to 5_done/ without any squash merge having occurred.
                    // Instead: just remove the agent from the pool and let
                    // auto-assign restart a new mergemaster for the story.
                    let stage = config_clone
                        .find_agent(&aname)
                        .map(agent_config_stage)
                        .unwrap_or_else(|| pipeline_stage(&aname));
                    if stage == PipelineStage::Mergemaster {
                        let (tx_done, done_session_id) = {
                            let mut lock = match agents_ref.lock() {
                                Ok(a) => a,
                                Err(_) => return,
                            };
                            if let Some(agent) = lock.remove(&key_clone) {
                                (agent.tx, agent.session_id.or(result.session_id))
                            } else {
                                (tx_clone.clone(), result.session_id)
                            }
                        };
                        // Clear any stale Running merge job so the next mergemaster
                        // can call start_merge_agent_work without hitting "Merge
                        // already in progress" (bug 498).
                        if let Ok(mut jobs) = merge_jobs_clone.lock()
                            && let Some(job) = jobs.get(&sid)
                            && matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
                        {
                            jobs.remove(&sid);
                        }
                        let _ = tx_done.send(AgentEvent::Done {
                            story_id: sid.clone(),
                            agent_name: aname.clone(),
                            session_id: done_session_id,
                        });
                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                        // Send a WorkItem event so the auto-assign watcher loop
                        // re-dispatches a new mergemaster if the story still needs
                        // merging.  This avoids an async call to start_agent inside
                        // a tokio::spawn (which would require Send).
                        let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
                            stage: "4_merge".to_string(),
                            item_id: sid.clone(),
                            action: "reassign".to_string(),
                            commit_msg: String::new(),
                            from_stage: None,
                        });
                    } else {
                        // Server-owned completion: run acceptance gates automatically
                        // when the agent process exits normally.
                        super::super::pipeline::run_server_owned_completion(
                            &agents_ref,
                            port_for_task,
                            &sid,
                            &aname,
                            result.session_id,
                            watcher_tx_clone.clone(),
                        )
                        .await;
                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    }
                }
                Err(e) => {
                    slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: e,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                }
            }
 }
@@ -0,0 +1,69 @@
 //! Pre-lock validation helpers for `AgentPool::start_agent`.
 use std::path::Path;
 use crate::config::ProjectConfig;
 use super::super::super::{PipelineStage, agent_config_stage, pipeline_stage};
 use super::super::worktree::find_active_story_stage;
 /// Validate that an explicit `agent_name` is allowed to attach to `story_id`'s
 /// current pipeline stage.
 ///
 /// Prevents wrong-stage assignments like a mergemaster on a coding-stage story
 /// (bug 312).  Returns `Ok(())` if the agent has no specific stage (e.g.
 /// supervisor) or the story is not in an active stage; `Err` with a descriptive
 /// message on a stage mismatch.
 pub(super) fn validate_agent_stage(
    config: &ProjectConfig,
    project_root: &Path,
    story_id: &str,
    agent_name: Option<&str>,
 ) -> Result<(), String> {
    let Some(name) = agent_name else {
        return Ok(());
    };
    let agent_stage = config
        .find_agent(name)
        .map(agent_config_stage)
        .unwrap_or_else(|| pipeline_stage(name));
    if agent_stage == PipelineStage::Other {
        return Ok(());
    }
    let Some(story_stage_dir) = find_active_story_stage(project_root, story_id) else {
        return Ok(());
    };
    let expected_stage = match story_stage_dir {
        "2_current" => PipelineStage::Coder,
        "3_qa" => PipelineStage::Qa,
        "4_merge" => PipelineStage::Mergemaster,
        _ => PipelineStage::Other,
    };
    if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
        return Err(format!(
            "Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
             story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
        ));
    }
    Ok(())
 }
 /// Read the preferred `agent:` field from the story's front matter.
 ///
 /// When `agent_name` is `None` (caller is auto-selecting), this lets
 /// `start_agent` honour an explicit `agent: coder-opus` written by the
 /// `assign` command (bug 379).  Returns `None` when an explicit agent_name
 /// was already supplied or when the story has no front-matter preference.
 pub(super) fn read_front_matter_agent(
    story_id: &str,
    agent_name: Option<&str>,
 ) -> Option<String> {
    if agent_name.is_some() {
        return None;
    }
    crate::db::read_content(story_id).and_then(|contents| {
        crate::io::story_metadata::parse_front_matter(&contents)
            .ok()?
            .agent
    })
 }