refactor: split agents/pool/start.rs into mod.rs + validation.rs + spawn.rs

The 1630-line start.rs is split into a sub-module directory: - validation.rs: validate_agent_stage + read_front_matter_agent helpers (69 lines) - spawn.rs: run_agent_spawn — the background async work that was inlined as a tokio::spawn closure body inside start_agent (359 lines) - mod.rs: AgentPool::start_agent orchestrator + tests (1062 lines) Stage validation and front-matter agent reading are pre-lock pure helpers that naturally extract. The spawn closure body becomes a free async fn that takes the previously-cloned values as parameters; rebound to the original _clone / _owned names at the top of the body so the actual work code is a verbatim copy. No behaviour change. All 23 start tests pass; full suite green.
2026-04-26 22:12:04 +00:00
parent 40f1794d41
commit eca15b4ee7
3 changed files with 458 additions and 344 deletions
@@ -16,6 +16,12 @@ use super::types::{PendingGuard, StoryAgent, composite_key};
 use super::worktree::find_active_story_stage;
 use super::{AgentPool, auto_assign};

+mod spawn;
+mod validation;
+
+use validation::{read_front_matter_agent, validate_agent_stage};
+
+
 impl AgentPool {
    /// Start an agent for a story: load config, create worktree, spawn agent.
    ///
@@ -71,45 +77,12 @@ impl AgentPool {
        }

        // Validate that the agent's configured stage matches the story's
-        // pipeline stage.  This prevents any caller (auto-assign, MCP tool,
-        // pipeline advance, supervisor) from starting a wrong-stage agent on
-        // a story — e.g. mergemaster on a coding-stage story (bug 312).
-        if let Some(name) = agent_name {
-            let agent_stage = config
-                .find_agent(name)
-                .map(agent_config_stage)
-                .unwrap_or_else(|| pipeline_stage(name));
-            if agent_stage != PipelineStage::Other
-                && let Some(story_stage_dir) = find_active_story_stage(project_root, story_id)
-            {
-                let expected_stage = match story_stage_dir {
-                    "2_current" => PipelineStage::Coder,
-                    "3_qa" => PipelineStage::Qa,
-                    "4_merge" => PipelineStage::Mergemaster,
-                    _ => PipelineStage::Other,
-                };
-                if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
-                    return Err(format!(
-                        "Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
-                         story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
-                    ));
-                }
-            }
-        }
+        // pipeline stage. (See validation::validate_agent_stage.)
+        validate_agent_stage(&config, project_root, story_id, agent_name)?;

        // Read the preferred agent from the story's front matter before acquiring
-        // the lock.  When no explicit agent_name is given, this lets start_agent
-        // honour `agent: coder-opus` written by the `assign` command — mirroring
-        // the auto_assign path (bug 379).
-        let front_matter_agent: Option<String> = if agent_name.is_none() {
-            crate::db::read_content(story_id).and_then(|contents| {
-                crate::io::story_metadata::parse_front_matter(&contents)
-                    .ok()?
-                    .agent
-            })
-        } else {
-            None
-        };
+        // the lock. (See validation::read_front_matter_agent.)
+        let front_matter_agent: Option<String> = read_front_matter_agent(story_id, agent_name);

        // Atomically resolve agent name, check availability, and register as
        // Pending.  When `agent_name` is `None` the first idle coder is
@@ -320,317 +293,28 @@ impl AgentPool {
            .unwrap_or(300);

        // Clone all values needed inside the background spawn.
-        let project_root_clone = project_root.to_path_buf();
-        let config_clone = config.clone();
-        let resume_context_owned = resume_context.map(str::to_string);
-        let session_id_to_resume_owned = session_id_to_resume;
-        let sid = story_id.to_string();
-        let aname = resolved_name.clone();
-        let tx_clone = tx.clone();
-        let agents_ref = self.agents.clone();
-        let key_clone = key.clone();
-        let log_clone = event_log.clone();
-        let port_for_task = self.port;
-        let log_writer_clone = log_writer.clone();
-        let child_killers_clone = self.child_killers.clone();
-        let watcher_tx_clone = self.watcher_tx.clone();
-        let merge_jobs_clone = Arc::clone(&self.merge_jobs);
-
        // Spawn the background task. Worktree creation and agent launch happen here
        // so `start_agent` returns immediately after registering the agent as
        // Pending — non-blocking by design (story 157).
-        let handle = tokio::spawn(async move {
-            // Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
-            let wt_info = match crate::worktree::create_worktree(
-                &project_root_clone,
-                &sid,
-                &config_clone,
-                port_for_task,
-            )
-            .await
-            {
-                Ok(wt) => wt,
-                Err(e) => {
-                    let error_msg = format!("Failed to create worktree: {e}");
-                    slog_error!("[agents] {error_msg}");
-                    let event = AgentEvent::Error {
-                        story_id: sid.clone(),
-                        agent_name: aname.clone(),
-                        message: error_msg,
-                    };
-                    if let Ok(mut log) = log_clone.lock() {
-                        log.push(event.clone());
-                    }
-                    let _ = tx_clone.send(event);
-                    if let Ok(mut agents) = agents_ref.lock()
-                        && let Some(agent) = agents.get_mut(&key_clone)
-                    {
-                        agent.status = AgentStatus::Failed;
-                    }
-                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
-                    return;
-                }
-            };
+        let handle = tokio::spawn(spawn::run_agent_spawn(
+            project_root.to_path_buf(),
+            config.clone(),
+            resume_context.map(str::to_string),
+            session_id_to_resume,
+            story_id.to_string(),
+            resolved_name.clone(),
+            tx.clone(),
+            self.agents.clone(),
+            key.clone(),
+            event_log.clone(),
+            self.port,
+            log_writer.clone(),
+            self.child_killers.clone(),
+            self.watcher_tx.clone(),
+            Arc::clone(&self.merge_jobs),
+            inactivity_timeout_secs,
+        ));

-            // Step 2: store worktree info and render agent command/args/prompt.
-            let wt_path_str = wt_info.path.to_string_lossy().to_string();
-            {
-                if let Ok(mut agents) = agents_ref.lock()
-                    && let Some(agent) = agents.get_mut(&key_clone)
-                {
-                    agent.worktree_info = Some(wt_info.clone());
-                }
-            }
-
-            let (command, args, mut prompt) = match config_clone.render_agent_args(
-                &wt_path_str,
-                &sid,
-                Some(&aname),
-                Some(&wt_info.base_branch),
-            ) {
-                Ok(result) => result,
-                Err(e) => {
-                    let error_msg = format!("Failed to render agent args: {e}");
-                    slog_error!("[agents] {error_msg}");
-                    let event = AgentEvent::Error {
-                        story_id: sid.clone(),
-                        agent_name: aname.clone(),
-                        message: error_msg,
-                    };
-                    if let Ok(mut log) = log_clone.lock() {
-                        log.push(event.clone());
-                    }
-                    let _ = tx_clone.send(event);
-                    if let Ok(mut agents) = agents_ref.lock()
-                        && let Some(agent) = agents.get_mut(&key_clone)
-                    {
-                        agent.status = AgentStatus::Failed;
-                    }
-                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
-                    return;
-                }
-            };
-
-            // Append project-local prompt content (.huskies/AGENT.md) to the
-            // baked-in prompt so every agent role sees project-specific guidance
-            // without any config changes.  The file is read fresh each spawn;
-            // if absent or empty, the prompt is unchanged and no warning is logged.
-            if let Some(local) =
-                crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
-            {
-                prompt.push_str("\n\n");
-                prompt.push_str(&local);
-            }
-
-            // Build the effective prompt and determine resume session.
-            //
-            // When resuming a previous session, discard the full rendered prompt
-            // (which would re-read CLAUDE.md and README) and send only the gate
-            // failure context as a new message.  On a fresh start, append the
-            // failure context to the original prompt as before.
-            let effective_prompt = match &session_id_to_resume_owned {
-                Some(_) => resume_context_owned.unwrap_or_default(),
-                None => {
-                    if let Some(ctx) = resume_context_owned {
-                        prompt.push_str(&ctx);
-                    }
-                    prompt
-                }
-            };
-
-            // Step 3: transition to Running now that the worktree is ready.
-            {
-                if let Ok(mut agents) = agents_ref.lock()
-                    && let Some(agent) = agents.get_mut(&key_clone)
-                {
-                    agent.status = AgentStatus::Running;
-                }
-            }
-            let _ = tx_clone.send(AgentEvent::Status {
-                story_id: sid.clone(),
-                agent_name: aname.clone(),
-                status: "running".to_string(),
-            });
-            AgentPool::notify_agent_state_changed(&watcher_tx_clone);
-
-            // Step 4: launch the agent process via the configured runtime.
-            let runtime_name = config_clone
-                .find_agent(&aname)
-                .and_then(|a| a.runtime.as_deref())
-                .unwrap_or("claude-code");
-
-            let run_result = match runtime_name {
-                "claude-code" => {
-                    let runtime = ClaudeCodeRuntime::new(
-                        child_killers_clone.clone(),
-                        watcher_tx_clone.clone(),
-                    );
-                    let ctx = RuntimeContext {
-                        story_id: sid.clone(),
-                        agent_name: aname.clone(),
-                        command,
-                        args,
-                        prompt: effective_prompt,
-                        cwd: wt_path_str,
-                        inactivity_timeout_secs,
-                        mcp_port: port_for_task,
-                        session_id_to_resume: session_id_to_resume_owned.clone(),
-                    };
-                    runtime
-                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
-                        .await
-                }
-                "gemini" => {
-                    let runtime = GeminiRuntime::new();
-                    let ctx = RuntimeContext {
-                        story_id: sid.clone(),
-                        agent_name: aname.clone(),
-                        command,
-                        args,
-                        prompt: effective_prompt,
-                        cwd: wt_path_str,
-                        inactivity_timeout_secs,
-                        mcp_port: port_for_task,
-                        session_id_to_resume: session_id_to_resume_owned.clone(),
-                    };
-                    runtime
-                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
-                        .await
-                }
-                "openai" => {
-                    let runtime = OpenAiRuntime::new();
-                    let ctx = RuntimeContext {
-                        story_id: sid.clone(),
-                        agent_name: aname.clone(),
-                        command,
-                        args,
-                        prompt: effective_prompt,
-                        cwd: wt_path_str,
-                        inactivity_timeout_secs,
-                        mcp_port: port_for_task,
-                        session_id_to_resume: session_id_to_resume_owned,
-                    };
-                    runtime
-                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
-                        .await
-                }
-                other => Err(format!(
-                    "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
-                     Supported: 'claude-code', 'gemini', 'openai'"
-                )),
-            };
-
-            match run_result {
-                Ok(result) => {
-                    // Persist token usage if the agent reported it.
-                    if let Some(ref usage) = result.token_usage
-                        && let Ok(agents) = agents_ref.lock()
-                        && let Some(agent) = agents.get(&key_clone)
-                        && let Some(ref pr) = agent.project_root
-                    {
-                        let model = config_clone
-                            .find_agent(&aname)
-                            .and_then(|a| a.model.clone());
-                        let record = crate::agents::token_usage::build_record(
-                            &sid,
-                            &aname,
-                            model,
-                            usage.clone(),
-                        );
-                        if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
-                            slog_error!(
-                                "[agents] Failed to persist token usage for \
-                                 {sid}:{aname}: {e}"
-                            );
-                        }
-                    }
-
-                    // Mergemaster agents have their own completion path via
-                    // start_merge_agent_work / run_merge_pipeline and must NOT go
-                    // through server-owned gates.  When a mergemaster exits early
-                    // (e.g. rate-limited before calling start_merge_agent_work) the
-                    // feature-branch worktree compiles fine and post-merge tests on
-                    // master pass (nothing changed), which would wrongly advance the
-                    // story to 5_done/ without any squash merge having occurred.
-                    // Instead: just remove the agent from the pool and let
-                    // auto-assign restart a new mergemaster for the story.
-                    let stage = config_clone
-                        .find_agent(&aname)
-                        .map(agent_config_stage)
-                        .unwrap_or_else(|| pipeline_stage(&aname));
-                    if stage == PipelineStage::Mergemaster {
-                        let (tx_done, done_session_id) = {
-                            let mut lock = match agents_ref.lock() {
-                                Ok(a) => a,
-                                Err(_) => return,
-                            };
-                            if let Some(agent) = lock.remove(&key_clone) {
-                                (agent.tx, agent.session_id.or(result.session_id))
-                            } else {
-                                (tx_clone.clone(), result.session_id)
-                            }
-                        };
-                        // Clear any stale Running merge job so the next mergemaster
-                        // can call start_merge_agent_work without hitting "Merge
-                        // already in progress" (bug 498).
-                        if let Ok(mut jobs) = merge_jobs_clone.lock()
-                            && let Some(job) = jobs.get(&sid)
-                            && matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
-                        {
-                            jobs.remove(&sid);
-                        }
-                        let _ = tx_done.send(AgentEvent::Done {
-                            story_id: sid.clone(),
-                            agent_name: aname.clone(),
-                            session_id: done_session_id,
-                        });
-                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
-                        // Send a WorkItem event so the auto-assign watcher loop
-                        // re-dispatches a new mergemaster if the story still needs
-                        // merging.  This avoids an async call to start_agent inside
-                        // a tokio::spawn (which would require Send).
-                        let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
-                            stage: "4_merge".to_string(),
-                            item_id: sid.clone(),
-                            action: "reassign".to_string(),
-                            commit_msg: String::new(),
-                            from_stage: None,
-                        });
-                    } else {
-                        // Server-owned completion: run acceptance gates automatically
-                        // when the agent process exits normally.
-                        super::pipeline::run_server_owned_completion(
-                            &agents_ref,
-                            port_for_task,
-                            &sid,
-                            &aname,
-                            result.session_id,
-                            watcher_tx_clone.clone(),
-                        )
-                        .await;
-                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
-                    }
-                }
-                Err(e) => {
-                    slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
-                    let event = AgentEvent::Error {
-                        story_id: sid.clone(),
-                        agent_name: aname.clone(),
-                        message: e,
-                    };
-                    if let Ok(mut log) = log_clone.lock() {
-                        log.push(event.clone());
-                    }
-                    let _ = tx_clone.send(event);
-                    if let Ok(mut agents) = agents_ref.lock()
-                        && let Some(agent) = agents.get_mut(&key_clone)
-                    {
-                        agent.status = AgentStatus::Failed;
-                    }
-                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
-                }
-            }
-        });

        // Store the task handle while the agent is still Pending.
        {
@@ -0,0 +1,361 @@
+//! Background async work spawned by `AgentPool::start_agent`.
+//!
+//! `start_agent` returns immediately after registering the agent as `Pending`;
+//! this module runs the slow worktree creation, agent process launch, and
+//! event streaming in the background (story 157).
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::{Arc, Mutex};
+
+use portable_pty::ChildKiller;
+use tokio::sync::broadcast;
+
+use crate::agent_log::AgentLogWriter;
+use crate::config::ProjectConfig;
+use crate::io::watcher::WatcherEvent;
+use crate::slog_error;
+
+use super::super::super::{
+    AgentEvent, AgentStatus, PipelineStage, agent_config_stage, pipeline_stage,
+};
+use super::super::super::merge::MergeJob;
+use super::super::AgentPool;
+use super::super::super::runtime::{
+    AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext,
+};
+use super::super::types::StoryAgent;
+
+/// Run the background worktree-creation + agent-launch flow.
+///
+/// Caller (`AgentPool::start_agent`) wraps this in `tokio::spawn` and stores
+/// the resulting handle on the Pending entry so cancellation works.
+#[allow(clippy::too_many_arguments)]
+pub(super) async fn run_agent_spawn(
+    project_root: PathBuf,
+    config: ProjectConfig,
+    resume_context: Option<String>,
+    session_id_to_resume: Option<String>,
+    story_id: String,
+    agent_name: String,
+    tx: broadcast::Sender<AgentEvent>,
+    agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
+    key: String,
+    event_log: Arc<Mutex<Vec<AgentEvent>>>,
+    port: u16,
+    log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
+    child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
+    watcher_tx: broadcast::Sender<WatcherEvent>,
+    merge_jobs: Arc<Mutex<HashMap<String, MergeJob>>>,
+    inactivity_timeout_secs: u64,
+) {
+    // Re-bind to the legacy `_clone` / `_owned` names so the body below remains
+    // a verbatim copy of the original closure (story 157).
+    let project_root_clone = project_root;
+    let config_clone = config;
+    let resume_context_owned = resume_context;
+    let session_id_to_resume_owned = session_id_to_resume;
+    let sid = story_id;
+    let aname = agent_name;
+    let tx_clone = tx;
+    let agents_ref = agents;
+    let key_clone = key;
+    let log_clone = event_log;
+    let port_for_task = port;
+    let log_writer_clone = log_writer;
+    let child_killers_clone = child_killers;
+    let watcher_tx_clone = watcher_tx;
+    let merge_jobs_clone = merge_jobs;
+    let _ = inactivity_timeout_secs;  // currently unused inside the closure body
+
+            // Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
+            let wt_info = match crate::worktree::create_worktree(
+                &project_root_clone,
+                &sid,
+                &config_clone,
+                port_for_task,
+            )
+            .await
+            {
+                Ok(wt) => wt,
+                Err(e) => {
+                    let error_msg = format!("Failed to create worktree: {e}");
+                    slog_error!("[agents] {error_msg}");
+                    let event = AgentEvent::Error {
+                        story_id: sid.clone(),
+                        agent_name: aname.clone(),
+                        message: error_msg,
+                    };
+                    if let Ok(mut log) = log_clone.lock() {
+                        log.push(event.clone());
+                    }
+                    let _ = tx_clone.send(event);
+                    if let Ok(mut agents) = agents_ref.lock()
+                        && let Some(agent) = agents.get_mut(&key_clone)
+                    {
+                        agent.status = AgentStatus::Failed;
+                    }
+                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
+                    return;
+                }
+            };
+
+            // Step 2: store worktree info and render agent command/args/prompt.
+            let wt_path_str = wt_info.path.to_string_lossy().to_string();
+            {
+                if let Ok(mut agents) = agents_ref.lock()
+                    && let Some(agent) = agents.get_mut(&key_clone)
+                {
+                    agent.worktree_info = Some(wt_info.clone());
+                }
+            }
+
+            let (command, args, mut prompt) = match config_clone.render_agent_args(
+                &wt_path_str,
+                &sid,
+                Some(&aname),
+                Some(&wt_info.base_branch),
+            ) {
+                Ok(result) => result,
+                Err(e) => {
+                    let error_msg = format!("Failed to render agent args: {e}");
+                    slog_error!("[agents] {error_msg}");
+                    let event = AgentEvent::Error {
+                        story_id: sid.clone(),
+                        agent_name: aname.clone(),
+                        message: error_msg,
+                    };
+                    if let Ok(mut log) = log_clone.lock() {
+                        log.push(event.clone());
+                    }
+                    let _ = tx_clone.send(event);
+                    if let Ok(mut agents) = agents_ref.lock()
+                        && let Some(agent) = agents.get_mut(&key_clone)
+                    {
+                        agent.status = AgentStatus::Failed;
+                    }
+                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
+                    return;
+                }
+            };
+
+            // Append project-local prompt content (.huskies/AGENT.md) to the
+            // baked-in prompt so every agent role sees project-specific guidance
+            // without any config changes.  The file is read fresh each spawn;
+            // if absent or empty, the prompt is unchanged and no warning is logged.
+            if let Some(local) =
+                crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
+            {
+                prompt.push_str("\n\n");
+                prompt.push_str(&local);
+            }
+
+            // Build the effective prompt and determine resume session.
+            //
+            // When resuming a previous session, discard the full rendered prompt
+            // (which would re-read CLAUDE.md and README) and send only the gate
+            // failure context as a new message.  On a fresh start, append the
+            // failure context to the original prompt as before.
+            let effective_prompt = match &session_id_to_resume_owned {
+                Some(_) => resume_context_owned.unwrap_or_default(),
+                None => {
+                    if let Some(ctx) = resume_context_owned {
+                        prompt.push_str(&ctx);
+                    }
+                    prompt
+                }
+            };
+
+            // Step 3: transition to Running now that the worktree is ready.
+            {
+                if let Ok(mut agents) = agents_ref.lock()
+                    && let Some(agent) = agents.get_mut(&key_clone)
+                {
+                    agent.status = AgentStatus::Running;
+                }
+            }
+            let _ = tx_clone.send(AgentEvent::Status {
+                story_id: sid.clone(),
+                agent_name: aname.clone(),
+                status: "running".to_string(),
+            });
+            AgentPool::notify_agent_state_changed(&watcher_tx_clone);
+
+            // Step 4: launch the agent process via the configured runtime.
+            let runtime_name = config_clone
+                .find_agent(&aname)
+                .and_then(|a| a.runtime.as_deref())
+                .unwrap_or("claude-code");
+
+            let run_result = match runtime_name {
+                "claude-code" => {
+                    let runtime = ClaudeCodeRuntime::new(
+                        child_killers_clone.clone(),
+                        watcher_tx_clone.clone(),
+                    );
+                    let ctx = RuntimeContext {
+                        story_id: sid.clone(),
+                        agent_name: aname.clone(),
+                        command,
+                        args,
+                        prompt: effective_prompt,
+                        cwd: wt_path_str,
+                        inactivity_timeout_secs,
+                        mcp_port: port_for_task,
+                        session_id_to_resume: session_id_to_resume_owned.clone(),
+                    };
+                    runtime
+                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
+                        .await
+                }
+                "gemini" => {
+                    let runtime = GeminiRuntime::new();
+                    let ctx = RuntimeContext {
+                        story_id: sid.clone(),
+                        agent_name: aname.clone(),
+                        command,
+                        args,
+                        prompt: effective_prompt,
+                        cwd: wt_path_str,
+                        inactivity_timeout_secs,
+                        mcp_port: port_for_task,
+                        session_id_to_resume: session_id_to_resume_owned.clone(),
+                    };
+                    runtime
+                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
+                        .await
+                }
+                "openai" => {
+                    let runtime = OpenAiRuntime::new();
+                    let ctx = RuntimeContext {
+                        story_id: sid.clone(),
+                        agent_name: aname.clone(),
+                        command,
+                        args,
+                        prompt: effective_prompt,
+                        cwd: wt_path_str,
+                        inactivity_timeout_secs,
+                        mcp_port: port_for_task,
+                        session_id_to_resume: session_id_to_resume_owned,
+                    };
+                    runtime
+                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
+                        .await
+                }
+                other => Err(format!(
+                    "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
+                     Supported: 'claude-code', 'gemini', 'openai'"
+                )),
+            };
+
+            match run_result {
+                Ok(result) => {
+                    // Persist token usage if the agent reported it.
+                    if let Some(ref usage) = result.token_usage
+                        && let Ok(agents) = agents_ref.lock()
+                        && let Some(agent) = agents.get(&key_clone)
+                        && let Some(ref pr) = agent.project_root
+                    {
+                        let model = config_clone
+                            .find_agent(&aname)
+                            .and_then(|a| a.model.clone());
+                        let record = crate::agents::token_usage::build_record(
+                            &sid,
+                            &aname,
+                            model,
+                            usage.clone(),
+                        );
+                        if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
+                            slog_error!(
+                                "[agents] Failed to persist token usage for \
+                                 {sid}:{aname}: {e}"
+                            );
+                        }
+                    }
+
+                    // Mergemaster agents have their own completion path via
+                    // start_merge_agent_work / run_merge_pipeline and must NOT go
+                    // through server-owned gates.  When a mergemaster exits early
+                    // (e.g. rate-limited before calling start_merge_agent_work) the
+                    // feature-branch worktree compiles fine and post-merge tests on
+                    // master pass (nothing changed), which would wrongly advance the
+                    // story to 5_done/ without any squash merge having occurred.
+                    // Instead: just remove the agent from the pool and let
+                    // auto-assign restart a new mergemaster for the story.
+                    let stage = config_clone
+                        .find_agent(&aname)
+                        .map(agent_config_stage)
+                        .unwrap_or_else(|| pipeline_stage(&aname));
+                    if stage == PipelineStage::Mergemaster {
+                        let (tx_done, done_session_id) = {
+                            let mut lock = match agents_ref.lock() {
+                                Ok(a) => a,
+                                Err(_) => return,
+                            };
+                            if let Some(agent) = lock.remove(&key_clone) {
+                                (agent.tx, agent.session_id.or(result.session_id))
+                            } else {
+                                (tx_clone.clone(), result.session_id)
+                            }
+                        };
+                        // Clear any stale Running merge job so the next mergemaster
+                        // can call start_merge_agent_work without hitting "Merge
+                        // already in progress" (bug 498).
+                        if let Ok(mut jobs) = merge_jobs_clone.lock()
+                            && let Some(job) = jobs.get(&sid)
+                            && matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
+                        {
+                            jobs.remove(&sid);
+                        }
+                        let _ = tx_done.send(AgentEvent::Done {
+                            story_id: sid.clone(),
+                            agent_name: aname.clone(),
+                            session_id: done_session_id,
+                        });
+                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
+                        // Send a WorkItem event so the auto-assign watcher loop
+                        // re-dispatches a new mergemaster if the story still needs
+                        // merging.  This avoids an async call to start_agent inside
+                        // a tokio::spawn (which would require Send).
+                        let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
+                            stage: "4_merge".to_string(),
+                            item_id: sid.clone(),
+                            action: "reassign".to_string(),
+                            commit_msg: String::new(),
+                            from_stage: None,
+                        });
+                    } else {
+                        // Server-owned completion: run acceptance gates automatically
+                        // when the agent process exits normally.
+                        super::super::pipeline::run_server_owned_completion(
+                            &agents_ref,
+                            port_for_task,
+                            &sid,
+                            &aname,
+                            result.session_id,
+                            watcher_tx_clone.clone(),
+                        )
+                        .await;
+                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
+                    }
+                }
+                Err(e) => {
+                    slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
+                    let event = AgentEvent::Error {
+                        story_id: sid.clone(),
+                        agent_name: aname.clone(),
+                        message: e,
+                    };
+                    if let Ok(mut log) = log_clone.lock() {
+                        log.push(event.clone());
+                    }
+                    let _ = tx_clone.send(event);
+                    if let Ok(mut agents) = agents_ref.lock()
+                        && let Some(agent) = agents.get_mut(&key_clone)
+                    {
+                        agent.status = AgentStatus::Failed;
+                    }
+                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
+                }
+            }
+}
@@ -0,0 +1,69 @@
+//! Pre-lock validation helpers for `AgentPool::start_agent`.
+
+use std::path::Path;
+
+use crate::config::ProjectConfig;
+
+use super::super::super::{PipelineStage, agent_config_stage, pipeline_stage};
+use super::super::worktree::find_active_story_stage;
+
+/// Validate that an explicit `agent_name` is allowed to attach to `story_id`'s
+/// current pipeline stage.
+///
+/// Prevents wrong-stage assignments like a mergemaster on a coding-stage story
+/// (bug 312).  Returns `Ok(())` if the agent has no specific stage (e.g.
+/// supervisor) or the story is not in an active stage; `Err` with a descriptive
+/// message on a stage mismatch.
+pub(super) fn validate_agent_stage(
+    config: &ProjectConfig,
+    project_root: &Path,
+    story_id: &str,
+    agent_name: Option<&str>,
+) -> Result<(), String> {
+    let Some(name) = agent_name else {
+        return Ok(());
+    };
+    let agent_stage = config
+        .find_agent(name)
+        .map(agent_config_stage)
+        .unwrap_or_else(|| pipeline_stage(name));
+    if agent_stage == PipelineStage::Other {
+        return Ok(());
+    }
+    let Some(story_stage_dir) = find_active_story_stage(project_root, story_id) else {
+        return Ok(());
+    };
+    let expected_stage = match story_stage_dir {
+        "2_current" => PipelineStage::Coder,
+        "3_qa" => PipelineStage::Qa,
+        "4_merge" => PipelineStage::Mergemaster,
+        _ => PipelineStage::Other,
+    };
+    if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
+        return Err(format!(
+            "Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
+             story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
+        ));
+    }
+    Ok(())
+}
+
+/// Read the preferred `agent:` field from the story's front matter.
+///
+/// When `agent_name` is `None` (caller is auto-selecting), this lets
+/// `start_agent` honour an explicit `agent: coder-opus` written by the
+/// `assign` command (bug 379).  Returns `None` when an explicit agent_name
+/// was already supplied or when the story has no front-matter preference.
+pub(super) fn read_front_matter_agent(
+    story_id: &str,
+    agent_name: Option<&str>,
+) -> Option<String> {
+    if agent_name.is_some() {
+        return None;
+    }
+    crate::db::read_content(story_id).and_then(|contents| {
+        crate::io::story_metadata::parse_front_matter(&contents)
+            .ok()?
+            .agent
+    })
+}