//! Agent start — spawns a new agent process in a worktree for a given story.
use crate::agent_log::AgentLogWriter;
use crate::config::ProjectConfig;
use crate::slog_error;
use std::path::Path;
use std::sync::{Arc, Mutex};
use tokio::sync::broadcast;

use super::super::runtime::{
    AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext,
};
use super::super::{
    AgentEvent, AgentInfo, AgentStatus, PipelineStage, agent_config_stage, pipeline_stage,
};
use super::types::{PendingGuard, StoryAgent, composite_key};
use super::worktree::find_active_story_stage;
use super::{AgentPool, auto_assign};

impl AgentPool {
    /// Start an agent for a story: load config, create worktree, spawn agent.
    ///
    /// When `agent_name` is `None`, automatically selects the first idle coder
    /// agent (story 190). If all coders are busy the call fails with an error
    /// indicating the story will be picked up when one becomes available.
    ///
    /// If `resume_context` is provided and `session_id_to_resume` is `None`,
    /// the context is appended to the rendered prompt so the agent can pick up
    /// from a previous failed attempt.
    ///
    /// If `session_id_to_resume` is provided, the agent is launched with
    /// `--resume <session_id>` instead of `-p <full_prompt>`.  Only
    /// `resume_context` (if any) is sent as the new message.  This lets
    /// the agent re-enter the previous conversation without re-reading
    /// CLAUDE.md and README, satisfying story 543.
    pub async fn start_agent(
        &self,
        project_root: &Path,
        story_id: &str,
        agent_name: Option<&str>,
        resume_context: Option<&str>,
        session_id_to_resume: Option<String>,
    ) -> Result<AgentInfo, String> {
        let config = ProjectConfig::load(project_root)?;

        // Validate explicit agent name early (no lock needed).
        if let Some(name) = agent_name {
            config
                .find_agent(name)
                .ok_or_else(|| format!("No agent named '{name}' in config"))?;
        }

        // Create name-independent shared resources before the lock so they are
        // ready for the atomic check-and-insert (story 132).
        let (tx, _) = broadcast::channel::<AgentEvent>(1024);
        let event_log: Arc<Mutex<Vec<AgentEvent>>> = Arc::new(Mutex::new(Vec::new()));
        let log_session_id = uuid::Uuid::new_v4().to_string();

        // Move story from backlog/ to current/ before checking agent
        // availability so that auto_assign_available_work can pick it up even
        // when all coders are currently busy (story 203). Only do this for
        // Coder-stage agents — QA and Mergemaster must attach to the story
        // at its existing stage (3_qa or 4_merge) and must NOT be demoted
        // back to 2_current/ on attach (bug 502). When `agent_name` is None
        // we are auto-selecting an idle coder, so still move.
        let starting_a_coder = agent_name
            .and_then(|n| config.find_agent(n).map(agent_config_stage))
            .map(|s| s == PipelineStage::Coder)
            .unwrap_or(true);
        if starting_a_coder {
            crate::agents::lifecycle::move_story_to_current(project_root, story_id)?;
        }

        // Validate that the agent's configured stage matches the story's
        // pipeline stage.  This prevents any caller (auto-assign, MCP tool,
        // pipeline advance, supervisor) from starting a wrong-stage agent on
        // a story — e.g. mergemaster on a coding-stage story (bug 312).
        if let Some(name) = agent_name {
            let agent_stage = config
                .find_agent(name)
                .map(agent_config_stage)
                .unwrap_or_else(|| pipeline_stage(name));
            if agent_stage != PipelineStage::Other
                && let Some(story_stage_dir) = find_active_story_stage(project_root, story_id)
            {
                let expected_stage = match story_stage_dir {
                    "2_current" => PipelineStage::Coder,
                    "3_qa" => PipelineStage::Qa,
                    "4_merge" => PipelineStage::Mergemaster,
                    _ => PipelineStage::Other,
                };
                if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
                    return Err(format!(
                        "Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
                         story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
                    ));
                }
            }
        }

        // Read the preferred agent from the story's front matter before acquiring
        // the lock.  When no explicit agent_name is given, this lets start_agent
        // honour `agent: coder-opus` written by the `assign` command — mirroring
        // the auto_assign path (bug 379).
        let front_matter_agent: Option<String> = if agent_name.is_none() {
            crate::db::read_content(story_id).and_then(|contents| {
                crate::io::story_metadata::parse_front_matter(&contents)
                    .ok()?
                    .agent
            })
        } else {
            None
        };

        // Atomically resolve agent name, check availability, and register as
        // Pending.  When `agent_name` is `None` the first idle coder is
        // selected inside the lock so no TOCTOU race can occur between the
        // availability check and the Pending insert (story 132, story 190).
        //
        // The `PendingGuard` ensures that if any step below fails the entry is
        // removed from the pool so it does not permanently block auto-assign
        // (bug 118).
        let resolved_name: String;
        let key: String;
        {
            let mut agents = self.agents.lock().map_err(|e| e.to_string())?;

            resolved_name = match agent_name {
                Some(name) => name.to_string(),
                None => {
                    // Honour the `agent:` field in the story's front matter so that
                    // `start 368` after `assign 368 opus` picks the right agent
                    // (bug 379).  Mirrors the auto_assign selection logic.
                    if let Some(ref pref) = front_matter_agent {
                        let stage_matches = config
                            .find_agent(pref)
                            .map(|cfg| agent_config_stage(cfg) == PipelineStage::Coder)
                            .unwrap_or(false);
                        if stage_matches {
                            if auto_assign::is_agent_free(&agents, pref) {
                                pref.clone()
                            } else {
                                return Err(format!(
                                    "Preferred agent '{pref}' from story front matter is busy; \
                                     story '{story_id}' has been queued in work/2_current/ and will \
                                     be auto-assigned when it becomes available"
                                ));
                            }
                        } else {
                            // Stage mismatch — fall back to any free coder.
                            auto_assign::find_free_agent_for_stage(
                                &config,
                                &agents,
                                &PipelineStage::Coder,
                            )
                            .map(|s| s.to_string())
                            .ok_or_else(|| {
                                if config
                                    .agent
                                    .iter()
                                    .any(|a| agent_config_stage(a) == PipelineStage::Coder)
                                {
                                    format!(
                                        "All coder agents are busy; story '{story_id}' has been \
                                         queued in work/2_current/ and will be auto-assigned when \
                                         one becomes available"
                                    )
                                } else {
                                    "No coder agent configured. Specify an agent_name explicitly."
                                        .to_string()
                                }
                            })?
                        }
                    } else {
                        auto_assign::find_free_agent_for_stage(
                            &config,
                            &agents,
                            &PipelineStage::Coder,
                        )
                        .map(|s| s.to_string())
                        .ok_or_else(|| {
                            if config
                                .agent
                                .iter()
                                .any(|a| agent_config_stage(a) == PipelineStage::Coder)
                            {
                                format!(
                                    "All coder agents are busy; story '{story_id}' has been \
                                     queued in work/2_current/ and will be auto-assigned when \
                                     one becomes available"
                                )
                            } else {
                                "No coder agent configured. Specify an agent_name explicitly."
                                    .to_string()
                            }
                        })?
                    }
                }
            };

            key = composite_key(story_id, &resolved_name);

            // Check for duplicate assignment (same story + same agent already active).
            if let Some(agent) = agents.get(&key)
                && (agent.status == AgentStatus::Running || agent.status == AgentStatus::Pending)
            {
                return Err(format!(
                    "Agent '{resolved_name}' for story '{story_id}' is already {}",
                    agent.status
                ));
            }
            // Enforce single-stage concurrency: reject if there is already a
            // Running/Pending agent at the same pipeline stage for this story.
            // This prevents two coders (or two QA/mergemaster agents) from
            // corrupting each other's work in the same worktree.
            // Applies to both explicit and auto-selected agents; the Other
            // stage (supervisors, unknown agents) is exempt.
            let resolved_stage = config
                .find_agent(&resolved_name)
                .map(agent_config_stage)
                .unwrap_or_else(|| pipeline_stage(&resolved_name));
            if resolved_stage != PipelineStage::Other
                && let Some(conflicting_name) = agents.iter().find_map(|(k, a)| {
                    let k_story = k.rsplit_once(':').map(|(s, _)| s).unwrap_or(k);
                    if k_story == story_id
                        && a.agent_name != resolved_name
                        && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
                    {
                        let a_stage = config
                            .find_agent(&a.agent_name)
                            .map(agent_config_stage)
                            .unwrap_or_else(|| pipeline_stage(&a.agent_name));
                        if a_stage == resolved_stage {
                            Some(a.agent_name.clone())
                        } else {
                            None
                        }
                    } else {
                        None
                    }
                })
            {
                return Err(format!(
                    "Cannot start '{resolved_name}' on story '{story_id}': \
                     '{conflicting_name}' is already active at the same pipeline stage"
                ));
            }
            // Enforce single-instance concurrency for explicitly-named agents:
            // if this agent is already running on any other story, reject.
            // Auto-selected agents are already guaranteed idle by
            // find_free_agent_for_stage, so this check is only needed for
            // explicit requests.
            if agent_name.is_some()
                && let Some(busy_story) = agents.iter().find_map(|(k, a)| {
                    if a.agent_name == resolved_name
                        && matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
                    {
                        Some(
                            k.rsplit_once(':')
                                .map(|(sid, _)| sid)
                                .unwrap_or(k)
                                .to_string(),
                        )
                    } else {
                        None
                    }
                })
            {
                return Err(format!(
                    "Agent '{resolved_name}' is already running on story '{busy_story}'; \
                     story '{story_id}' will be picked up when the agent becomes available"
                ));
            }
            agents.insert(
                key.clone(),
                StoryAgent {
                    agent_name: resolved_name.clone(),
                    status: AgentStatus::Pending,
                    worktree_info: None,
                    session_id: None,
                    tx: tx.clone(),
                    task_handle: None,
                    event_log: event_log.clone(),
                    completion: None,
                    project_root: Some(project_root.to_path_buf()),
                    log_session_id: Some(log_session_id.clone()),
                    merge_failure_reported: false,
                    throttled: false,
                },
            );
        }
        let mut pending_guard = PendingGuard::new(self.agents.clone(), key.clone());

        // Create persistent log writer (needs resolved_name, so must be after
        // the atomic resolution above).
        let log_writer =
            match AgentLogWriter::new(project_root, story_id, &resolved_name, &log_session_id) {
                Ok(w) => Some(Arc::new(Mutex::new(w))),
                Err(e) => {
                    eprintln!(
                        "[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}"
                    );
                    None
                }
            };

        // Notify WebSocket clients that a new agent is pending.
        Self::notify_agent_state_changed(&self.watcher_tx);

        let _ = tx.send(AgentEvent::Status {
            story_id: story_id.to_string(),
            agent_name: resolved_name.clone(),
            status: "pending".to_string(),
        });

        // Extract inactivity timeout from the agent config before cloning config.
        let inactivity_timeout_secs = config
            .find_agent(&resolved_name)
            .map(|a| a.inactivity_timeout_secs)
            .unwrap_or(300);

        // Clone all values needed inside the background spawn.
        let project_root_clone = project_root.to_path_buf();
        let config_clone = config.clone();
        let resume_context_owned = resume_context.map(str::to_string);
        let session_id_to_resume_owned = session_id_to_resume;
        let sid = story_id.to_string();
        let aname = resolved_name.clone();
        let tx_clone = tx.clone();
        let agents_ref = self.agents.clone();
        let key_clone = key.clone();
        let log_clone = event_log.clone();
        let port_for_task = self.port;
        let log_writer_clone = log_writer.clone();
        let child_killers_clone = self.child_killers.clone();
        let watcher_tx_clone = self.watcher_tx.clone();
        let merge_jobs_clone = Arc::clone(&self.merge_jobs);

        // Spawn the background task. Worktree creation and agent launch happen here
        // so `start_agent` returns immediately after registering the agent as
        // Pending — non-blocking by design (story 157).
        let handle = tokio::spawn(async move {
            // Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
            let wt_info = match crate::worktree::create_worktree(
                &project_root_clone,
                &sid,
                &config_clone,
                port_for_task,
            )
            .await
            {
                Ok(wt) => wt,
                Err(e) => {
                    let error_msg = format!("Failed to create worktree: {e}");
                    slog_error!("[agents] {error_msg}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: error_msg,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    return;
                }
            };

            // Step 2: store worktree info and render agent command/args/prompt.
            let wt_path_str = wt_info.path.to_string_lossy().to_string();
            {
                if let Ok(mut agents) = agents_ref.lock()
                    && let Some(agent) = agents.get_mut(&key_clone)
                {
                    agent.worktree_info = Some(wt_info.clone());
                }
            }

            let (command, args, mut prompt) = match config_clone.render_agent_args(
                &wt_path_str,
                &sid,
                Some(&aname),
                Some(&wt_info.base_branch),
            ) {
                Ok(result) => result,
                Err(e) => {
                    let error_msg = format!("Failed to render agent args: {e}");
                    slog_error!("[agents] {error_msg}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: error_msg,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    return;
                }
            };

            // Build the effective prompt and determine resume session.
            //
            // When resuming a previous session, discard the full rendered prompt
            // (which would re-read CLAUDE.md and README) and send only the gate
            // failure context as a new message.  On a fresh start, append the
            // failure context to the original prompt as before.
            let effective_prompt = match &session_id_to_resume_owned {
                Some(_) => resume_context_owned.unwrap_or_default(),
                None => {
                    if let Some(ctx) = resume_context_owned {
                        prompt.push_str(&ctx);
                    }
                    prompt
                }
            };

            // Step 3: transition to Running now that the worktree is ready.
            {
                if let Ok(mut agents) = agents_ref.lock()
                    && let Some(agent) = agents.get_mut(&key_clone)
                {
                    agent.status = AgentStatus::Running;
                }
            }
            let _ = tx_clone.send(AgentEvent::Status {
                story_id: sid.clone(),
                agent_name: aname.clone(),
                status: "running".to_string(),
            });
            AgentPool::notify_agent_state_changed(&watcher_tx_clone);

            // Step 4: launch the agent process via the configured runtime.
            let runtime_name = config_clone
                .find_agent(&aname)
                .and_then(|a| a.runtime.as_deref())
                .unwrap_or("claude-code");

            let run_result = match runtime_name {
                "claude-code" => {
                    let runtime = ClaudeCodeRuntime::new(
                        child_killers_clone.clone(),
                        watcher_tx_clone.clone(),
                    );
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned.clone(),
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "gemini" => {
                    let runtime = GeminiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned.clone(),
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                "openai" => {
                    let runtime = OpenAiRuntime::new();
                    let ctx = RuntimeContext {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        command,
                        args,
                        prompt: effective_prompt,
                        cwd: wt_path_str,
                        inactivity_timeout_secs,
                        mcp_port: port_for_task,
                        session_id_to_resume: session_id_to_resume_owned,
                    };
                    runtime
                        .start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
                        .await
                }
                other => Err(format!(
                    "Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
                     Supported: 'claude-code', 'gemini', 'openai'"
                )),
            };

            match run_result {
                Ok(result) => {
                    // Persist token usage if the agent reported it.
                    if let Some(ref usage) = result.token_usage
                        && let Ok(agents) = agents_ref.lock()
                        && let Some(agent) = agents.get(&key_clone)
                        && let Some(ref pr) = agent.project_root
                    {
                        let model = config_clone
                            .find_agent(&aname)
                            .and_then(|a| a.model.clone());
                        let record = crate::agents::token_usage::build_record(
                            &sid,
                            &aname,
                            model,
                            usage.clone(),
                        );
                        if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
                            slog_error!(
                                "[agents] Failed to persist token usage for \
                                 {sid}:{aname}: {e}"
                            );
                        }
                    }

                    // Mergemaster agents have their own completion path via
                    // start_merge_agent_work / run_merge_pipeline and must NOT go
                    // through server-owned gates.  When a mergemaster exits early
                    // (e.g. rate-limited before calling start_merge_agent_work) the
                    // feature-branch worktree compiles fine and post-merge tests on
                    // master pass (nothing changed), which would wrongly advance the
                    // story to 5_done/ without any squash merge having occurred.
                    // Instead: just remove the agent from the pool and let
                    // auto-assign restart a new mergemaster for the story.
                    let stage = config_clone
                        .find_agent(&aname)
                        .map(agent_config_stage)
                        .unwrap_or_else(|| pipeline_stage(&aname));
                    if stage == PipelineStage::Mergemaster {
                        let (tx_done, done_session_id) = {
                            let mut lock = match agents_ref.lock() {
                                Ok(a) => a,
                                Err(_) => return,
                            };
                            if let Some(agent) = lock.remove(&key_clone) {
                                (agent.tx, agent.session_id.or(result.session_id))
                            } else {
                                (tx_clone.clone(), result.session_id)
                            }
                        };
                        // Clear any stale Running merge job so the next mergemaster
                        // can call start_merge_agent_work without hitting "Merge
                        // already in progress" (bug 498).
                        if let Ok(mut jobs) = merge_jobs_clone.lock()
                            && let Some(job) = jobs.get(&sid)
                            && matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
                        {
                            jobs.remove(&sid);
                        }
                        let _ = tx_done.send(AgentEvent::Done {
                            story_id: sid.clone(),
                            agent_name: aname.clone(),
                            session_id: done_session_id,
                        });
                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                        // Send a WorkItem event so the auto-assign watcher loop
                        // re-dispatches a new mergemaster if the story still needs
                        // merging.  This avoids an async call to start_agent inside
                        // a tokio::spawn (which would require Send).
                        let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
                            stage: "4_merge".to_string(),
                            item_id: sid.clone(),
                            action: "reassign".to_string(),
                            commit_msg: String::new(),
                            from_stage: None,
                        });
                    } else {
                        // Server-owned completion: run acceptance gates automatically
                        // when the agent process exits normally.
                        super::pipeline::run_server_owned_completion(
                            &agents_ref,
                            port_for_task,
                            &sid,
                            &aname,
                            result.session_id,
                            watcher_tx_clone.clone(),
                        )
                        .await;
                        AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                    }
                }
                Err(e) => {
                    slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
                    let event = AgentEvent::Error {
                        story_id: sid.clone(),
                        agent_name: aname.clone(),
                        message: e,
                    };
                    if let Ok(mut log) = log_clone.lock() {
                        log.push(event.clone());
                    }
                    let _ = tx_clone.send(event);
                    if let Ok(mut agents) = agents_ref.lock()
                        && let Some(agent) = agents.get_mut(&key_clone)
                    {
                        agent.status = AgentStatus::Failed;
                    }
                    AgentPool::notify_agent_state_changed(&watcher_tx_clone);
                }
            }
        });

        // Store the task handle while the agent is still Pending.
        {
            let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
            if let Some(agent) = agents.get_mut(&key) {
                agent.task_handle = Some(handle);
            }
        }

        // Agent successfully spawned — prevent the guard from removing the entry.
        pending_guard.disarm();

        Ok(AgentInfo {
            story_id: story_id.to_string(),
            agent_name: resolved_name,
            status: AgentStatus::Pending,
            session_id: None,
            worktree_path: None,
            base_branch: None,
            completion: None,
            log_session_id: Some(log_session_id),
            throttled: false,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::super::AgentPool;
    use crate::agents::{AgentEvent, AgentStatus};

    #[tokio::test]
    async fn start_agent_auto_selects_second_coder_when_first_busy() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        std::fs::create_dir_all(&sk).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
[[agent]]
name = "supervisor"
stage = "other"

[[agent]]
name = "coder-1"
stage = "coder"

[[agent]]
name = "coder-2"
stage = "coder"
"#,
        )
        .unwrap();

        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("other-story", "coder-1", AgentStatus::Running);

        let result = pool
            .start_agent(tmp.path(), "42_my_story", None, None, None)
            .await;
        match result {
            Ok(info) => {
                assert_eq!(info.agent_name, "coder-2");
            }
            Err(err) => {
                assert!(
                    !err.contains("All coder agents are busy"),
                    "should have selected coder-2 but got: {err}"
                );
                assert!(
                    !err.contains("No coder agent configured"),
                    "should not fail on agent selection, got: {err}"
                );
            }
        }
    }

    #[tokio::test]
    async fn start_agent_returns_busy_when_all_coders_occupied() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        std::fs::create_dir_all(&sk).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
[[agent]]
name = "coder-1"
stage = "coder"

[[agent]]
name = "coder-2"
stage = "coder"
"#,
        )
        .unwrap();

        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
        pool.inject_test_agent("story-2", "coder-2", AgentStatus::Pending);

        let result = pool
            .start_agent(tmp.path(), "story-3", None, None, None)
            .await;
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            err.contains("All coder agents are busy"),
            "expected busy error, got: {err}"
        );
    }

    #[tokio::test]
    async fn start_agent_moves_story_to_current_when_coders_busy() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        let backlog = sk.join("work/1_backlog");
        std::fs::create_dir_all(&backlog).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
[[agent]]
name = "coder-1"
stage = "coder"
"#,
        )
        .unwrap();
        let story_content = "---\nname: Story 3\n---\n";
        std::fs::write(backlog.join("story-3.md"), story_content).unwrap();
        crate::db::ensure_content_store();
        crate::db::write_content("story-3", story_content);

        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);

        let result = pool
            .start_agent(tmp.path(), "story-3", None, None, None)
            .await;

        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            err.contains("All coder agents are busy"),
            "expected busy error, got: {err}"
        );
        assert!(
            err.contains("queued in work/2_current/"),
            "expected story-to-current message, got: {err}"
        );

        // The lifecycle function updates the content store (not the filesystem),
        // so verify the move via the DB.
        let content = crate::db::read_content("story-3")
            .expect("story-3 should be in content store after move to current");
        assert!(
            content.contains("name: Story 3"),
            "story-3 content should be preserved after move"
        );
    }

    #[tokio::test]
    async fn start_agent_story_already_in_current_is_noop() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        let current = sk.join("work/2_current");
        std::fs::create_dir_all(&current).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
        )
        .unwrap();
        std::fs::write(current.join("story-5.md"), "---\nname: Story 5\n---\n").unwrap();

        let pool = AgentPool::new_test(3001);

        let result = pool
            .start_agent(tmp.path(), "story-5", None, None, None)
            .await;
        match result {
            Ok(_) => {}
            Err(e) => {
                assert!(
                    !e.contains("Failed to move"),
                    "should not fail on idempotent move, got: {e}"
                );
            }
        }
    }

    #[tokio::test]
    async fn start_agent_explicit_name_unchanged_when_busy() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        std::fs::create_dir_all(&sk).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
[[agent]]
name = "coder-1"
stage = "coder"

[[agent]]
name = "coder-2"
stage = "coder"
"#,
        )
        .unwrap();

        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);

        let result = pool
            .start_agent(tmp.path(), "story-2", Some("coder-1"), None, None)
            .await;
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            err.contains("coder-1") && err.contains("already running"),
            "expected explicit busy error, got: {err}"
        );
    }

    // ── start_agent single-instance concurrency tests ─────────────────────────

    #[tokio::test]
    async fn start_agent_rejects_when_same_agent_already_running_on_another_story() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap();

        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("story-a", "qa", AgentStatus::Running);

        let result = pool
            .start_agent(root, "story-b", Some("qa"), None, None)
            .await;

        assert!(
            result.is_err(),
            "start_agent should fail when qa is already running on another story"
        );
        let err = result.unwrap_err();
        assert!(
            err.contains("already running") || err.contains("becomes available"),
            "error message should explain why: got '{err}'"
        );
    }

    #[tokio::test]
    async fn start_agent_allows_new_story_when_previous_run_is_completed() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap();

        let pool = AgentPool::new_test(3001);
        pool.inject_test_agent("story-a", "qa", AgentStatus::Completed);

        let result = pool
            .start_agent(root, "story-b", Some("qa"), None, None)
            .await;

        if let Err(ref e) = result {
            assert!(
                !e.contains("already running") && !e.contains("becomes available"),
                "completed agent must not trigger the concurrency guard: got '{e}'"
            );
        }
    }

    // ── bug 118: pending entry cleanup on start_agent failure ────────────────

    #[tokio::test]
    async fn start_agent_cleans_up_pending_entry_on_failure() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
        )
        .unwrap();

        let upcoming = root.join(".huskies/work/1_backlog");
        fs::create_dir_all(&upcoming).unwrap();
        fs::write(upcoming.join("50_story_test.md"), "---\nname: Test\n---\n").unwrap();

        let pool = AgentPool::new_test(3099);

        let result = pool
            .start_agent(root, "50_story_test", Some("coder-1"), None, None)
            .await;

        assert!(
            result.is_ok(),
            "start_agent should return Ok(Pending) immediately: {:?}",
            result.err()
        );
        assert_eq!(
            result.unwrap().status,
            AgentStatus::Pending,
            "initial status must be Pending"
        );

        let final_info = pool
            .wait_for_agent("50_story_test", "coder-1", 5000)
            .await
            .expect("wait_for_agent should not time out");
        assert_eq!(
            final_info.status,
            AgentStatus::Failed,
            "agent must transition to Failed after worktree creation error"
        );

        let agents = pool.agents.lock().unwrap();
        let failed_entry = agents
            .values()
            .find(|a| a.agent_name == "coder-1" && a.status == AgentStatus::Failed);
        assert!(
            failed_entry.is_some(),
            "agent pool must retain a Failed entry so the UI can show the error state"
        );
        drop(agents);

        let events = pool
            .drain_events("50_story_test", "coder-1")
            .expect("drain_events should succeed");
        let has_error_event = events.iter().any(|e| matches!(e, AgentEvent::Error { .. }));
        assert!(
            has_error_event,
            "event_log must contain AgentEvent::Error after worktree creation fails"
        );
    }

    #[tokio::test]
    async fn start_agent_guard_does_not_remove_running_entry() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap();

        let pool = AgentPool::new_test(3099);
        pool.inject_test_agent("story-x", "qa", AgentStatus::Running);

        let result = pool
            .start_agent(root, "story-y", Some("qa"), None, None)
            .await;

        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            err.contains("already running") || err.contains("becomes available"),
            "running entry must survive: got '{err}'"
        );
    }

    // ── TOCTOU race-condition regression tests (story 132) ───────────────────

    #[tokio::test]
    async fn toctou_pending_entry_blocks_same_agent_on_different_story() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3099);
        pool.inject_test_agent("86_story_foo", "coder-1", AgentStatus::Pending);

        let result = pool
            .start_agent(root, "130_story_bar", Some("coder-1"), None, None)
            .await;

        assert!(result.is_err(), "second start_agent must be rejected");
        let err = result.unwrap_err();
        assert!(
            err.contains("already running") || err.contains("becomes available"),
            "expected concurrency-rejection message, got: '{err}'"
        );
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn toctou_concurrent_start_agent_same_agent_exactly_one_concurrency_rejection() {
        use std::fs;
        use std::sync::Arc;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path().to_path_buf();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap();
        fs::write(
            root.join(".huskies/project.toml"),
            "[[agent]]\nname = \"coder-1\"\n",
        )
        .unwrap();
        fs::write(
            root.join(".huskies/work/1_backlog/86_story_foo.md"),
            "---\nname: Foo\n---\n",
        )
        .unwrap();
        fs::write(
            root.join(".huskies/work/1_backlog/130_story_bar.md"),
            "---\nname: Bar\n---\n",
        )
        .unwrap();

        let pool = Arc::new(AgentPool::new_test(3099));

        let pool1 = pool.clone();
        let root1 = root.clone();
        let t1 = tokio::spawn(async move {
            pool1
                .start_agent(&root1, "86_story_foo", Some("coder-1"), None, None)
                .await
        });

        let pool2 = pool.clone();
        let root2 = root.clone();
        let t2 = tokio::spawn(async move {
            pool2
                .start_agent(&root2, "130_story_bar", Some("coder-1"), None, None)
                .await
        });

        let (r1, r2) = tokio::join!(t1, t2);
        let r1 = r1.unwrap();
        let r2 = r2.unwrap();

        let concurrency_rejections = [&r1, &r2]
            .iter()
            .filter(|r| {
                r.as_ref().is_err_and(|e| {
                    e.contains("already running") || e.contains("becomes available")
                })
            })
            .count();

        assert_eq!(
            concurrency_rejections, 1,
            "exactly one call must be rejected by the concurrency check; \
             got r1={r1:?} r2={r2:?}"
        );
    }

    // ── story-230: prevent duplicate stage agents on same story ───────────────

    #[tokio::test]
    async fn start_agent_rejects_second_coder_stage_on_same_story() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3099);
        pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running);

        let result = pool
            .start_agent(root, "42_story_foo", Some("coder-2"), None, None)
            .await;

        assert!(
            result.is_err(),
            "second coder on same story must be rejected"
        );
        let err = result.unwrap_err();
        assert!(
            err.contains("same pipeline stage"),
            "error must mention same pipeline stage, got: '{err}'"
        );
        assert!(
            err.contains("coder-1") && err.contains("coder-2"),
            "error must name both agents, got: '{err}'"
        );
    }

    #[tokio::test]
    async fn start_agent_rejects_second_qa_stage_on_same_story() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"qa-1\"\nstage = \"qa\"\n\n\
             [[agent]]\nname = \"qa-2\"\nstage = \"qa\"\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3099);
        pool.inject_test_agent("55_story_bar", "qa-1", AgentStatus::Running);

        let result = pool
            .start_agent(root, "55_story_bar", Some("qa-2"), None, None)
            .await;

        assert!(result.is_err(), "second qa on same story must be rejected");
        let err = result.unwrap_err();
        assert!(
            err.contains("same pipeline stage"),
            "error must mention same pipeline stage, got: '{err}'"
        );
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn start_agent_concurrent_two_coders_same_story_exactly_one_stage_rejection() {
        use std::fs;
        use std::sync::Arc;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path().to_path_buf();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(sk_dir.join("work/2_current")).unwrap();
        fs::write(
            root.join(".huskies/project.toml"),
            "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n",
        )
        .unwrap();
        fs::write(
            root.join(".huskies/work/2_current/42_story_foo.md"),
            "---\nname: Foo\n---\n",
        )
        .unwrap();

        let pool = Arc::new(AgentPool::new_test(3099));

        let pool1 = pool.clone();
        let root1 = root.clone();
        let t1 = tokio::spawn(async move {
            pool1
                .start_agent(&root1, "42_story_foo", Some("coder-1"), None, None)
                .await
        });

        let pool2 = pool.clone();
        let root2 = root.clone();
        let t2 = tokio::spawn(async move {
            pool2
                .start_agent(&root2, "42_story_foo", Some("coder-2"), None, None)
                .await
        });

        let (r1, r2) = tokio::join!(t1, t2);
        let r1 = r1.unwrap();
        let r2 = r2.unwrap();

        let stage_rejections = [&r1, &r2]
            .iter()
            .filter(|r| r.as_ref().is_err_and(|e| e.contains("same pipeline stage")))
            .count();

        assert_eq!(
            stage_rejections, 1,
            "exactly one call must be rejected by the stage-conflict check; \
             got r1={r1:?} r2={r2:?}"
        );
    }

    #[tokio::test]
    async fn start_agent_two_coders_different_stories_not_blocked_by_stage_check() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap();
        fs::write(
            root.join(".huskies/project.toml"),
            "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n",
        )
        .unwrap();
        fs::write(
            root.join(".huskies/work/1_backlog/99_story_baz.md"),
            "---\nname: Baz\n---\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3099);
        pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running);

        let result = pool
            .start_agent(root, "99_story_baz", Some("coder-2"), None, None)
            .await;

        if let Err(ref e) = result {
            assert!(
                !e.contains("same pipeline stage"),
                "stage-conflict guard must not fire for agents on different stories; \
                 got: '{e}'"
            );
        }
    }

    // ── bug 312: stage-pipeline mismatch guard in start_agent ──────────────

    #[tokio::test]
    async fn start_agent_rejects_mergemaster_on_coding_stage_story() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(sk_dir.join("work/2_current")).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
             [[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
        )
        .unwrap();
        crate::db::ensure_content_store();
        crate::db::write_item_with_content("310_story_foo", "2_current", "---\nname: Foo\n---\n");

        let pool = AgentPool::new_test(3099);
        let result = pool
            .start_agent(root, "310_story_foo", Some("mergemaster"), None, None)
            .await;

        assert!(
            result.is_err(),
            "mergemaster must not be assigned to a story in 2_current/"
        );
        let err = result.unwrap_err();
        assert!(
            err.contains("stage") && err.contains("2_current"),
            "error must mention stage mismatch, got: '{err}'"
        );
    }

    #[tokio::test]
    async fn start_agent_rejects_coder_on_qa_stage_story() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\
             [[agent]]\nname = \"qa\"\nstage = \"qa\"\n",
        )
        .unwrap();
        crate::db::ensure_content_store();
        crate::db::write_item_with_content(
            "8842_story_qa_guard",
            "3_qa",
            "---\nname: QA Guard\n---\n",
        );

        let pool = AgentPool::new_test(3099);
        let result = pool
            .start_agent(root, "8842_story_qa_guard", Some("coder-1"), None, None)
            .await;

        assert!(
            result.is_err(),
            "coder must not be assigned to a story in 3_qa/"
        );
        let err = result.unwrap_err();
        assert!(
            err.contains("stage") && err.contains("3_qa"),
            "error must mention stage mismatch, got: '{err}'"
        );
    }

    #[tokio::test]
    async fn start_agent_rejects_qa_on_merge_stage_story() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(&sk_dir).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"qa\"\nstage = \"qa\"\n\n\
             [[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
        )
        .unwrap();
        crate::db::ensure_content_store();
        crate::db::write_item_with_content("55_story_baz", "4_merge", "---\nname: Baz\n---\n");

        let pool = AgentPool::new_test(3099);
        let result = pool
            .start_agent(root, "55_story_baz", Some("qa"), None, None)
            .await;

        assert!(
            result.is_err(),
            "qa must not be assigned to a story in 4_merge/"
        );
        let err = result.unwrap_err();
        assert!(
            err.contains("stage") && err.contains("4_merge"),
            "error must mention stage mismatch, got: '{err}'"
        );
    }

    #[tokio::test]
    async fn start_agent_allows_supervisor_on_any_stage() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(sk_dir.join("work/2_current")).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"supervisor\"\nstage = \"other\"\n",
        )
        .unwrap();
        fs::write(
            sk_dir.join("work/2_current/77_story_sup.md"),
            "---\nname: Sup\n---\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3099);
        let result = pool
            .start_agent(root, "77_story_sup", Some("supervisor"), None, None)
            .await;

        match result {
            Ok(_) => {}
            Err(e) => {
                assert!(
                    !e.contains("stage:") || !e.contains("cannot be assigned"),
                    "supervisor should not be rejected for stage mismatch, got: '{e}'"
                );
            }
        }
    }

    #[tokio::test]
    async fn start_agent_allows_correct_stage_agent() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(sk_dir.join("work/4_merge")).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
        )
        .unwrap();
        fs::write(
            sk_dir.join("work/4_merge/88_story_ok.md"),
            "---\nname: OK\n---\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3099);
        let result = pool
            .start_agent(root, "88_story_ok", Some("mergemaster"), None, None)
            .await;

        match result {
            Ok(_) => {}
            Err(e) => {
                assert!(
                    !e.contains("cannot be assigned"),
                    "mergemaster on 4_merge/ story should not fail stage check, got: '{e}'"
                );
            }
        }
    }

    /// Bug 502: when start_agent is called for a non-Coder agent (mergemaster
    /// or qa) on a story that's in 4_merge/, the unconditional
    /// move_story_to_current at the top of start_agent must NOT fire — even
    /// when a stale split-brain shadow of the story exists in 1_backlog/.
    ///
    /// Pre-fix behaviour: move_story_to_current would find the 1_backlog
    /// shadow and move it to 2_current/. find_active_story_stage would then
    /// report 2_current/, the stage check would expect a Coder-stage agent,
    /// and mergemaster would be rejected — leaving the story in 2_current/
    /// to be picked up by the next auto-assign tick as a coder. Infinite loop.
    /// Observed live on 2026-04-09 against story 478.
    #[tokio::test]
    async fn start_agent_does_not_demote_merge_stage_story_with_backlog_shadow() {
        use std::fs;

        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path();

        let sk_dir = root.join(".huskies");
        fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap();
        fs::create_dir_all(sk_dir.join("work/4_merge")).unwrap();
        fs::write(
            sk_dir.join("project.toml"),
            "[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
        )
        .unwrap();
        // Real copy in 4_merge/ (where the story actually is per the DB).
        fs::write(
            sk_dir.join("work/4_merge/502_story_split_brain.md"),
            "---\nname: Split Brain\n---\n",
        )
        .unwrap();
        // Stale split-brain shadow in 1_backlog/ (post-491/492 migration
        // artifact — the filesystem shadow that bit us in production).
        fs::write(
            sk_dir.join("work/1_backlog/502_story_split_brain.md"),
            "---\nname: Split Brain\n---\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3098);
        let result = pool
            .start_agent(
                root,
                "502_story_split_brain",
                Some("mergemaster"),
                None,
                None,
            )
            .await;

        // Stage check must not reject mergemaster.
        if let Err(ref e) = result {
            assert!(
                !e.contains("cannot be assigned"),
                "mergemaster on 4_merge/ story must not fail stage check even \
                 when a 1_backlog shadow exists, got: '{e}'"
            );
        }

        // Critical: the story must still be in 4_merge/ after the call.
        // Before the fix, line 53 of start.rs would have demoted it to
        // 2_current/ via move_story_to_current finding the 1_backlog shadow.
        assert!(
            sk_dir
                .join("work/4_merge/502_story_split_brain.md")
                .exists(),
            "story must still be in 4_merge/ after start_agent(mergemaster, ...)"
        );
        assert!(
            !sk_dir
                .join("work/2_current/502_story_split_brain.md")
                .exists(),
            "story must NOT have been demoted to 2_current/ — that's bug 502"
        );
    }

    // ── front matter agent preference (bug 379) ──────────────────────────────

    #[tokio::test]
    async fn start_agent_honours_front_matter_agent_when_idle() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        let backlog = sk.join("work/1_backlog");
        std::fs::create_dir_all(&backlog).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
[[agent]]
name = "coder-sonnet"
stage = "coder"

[[agent]]
name = "coder-opus"
stage = "coder"
"#,
        )
        .unwrap();
        // Story file with agent preference in front matter.
        std::fs::write(
            backlog.join("368_story_test.md"),
            "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n",
        )
        .unwrap();

        let pool = AgentPool::new_test(3010);
        // coder-sonnet is busy so without front matter the auto-selection
        // would skip coder-opus and try something else.
        pool.inject_test_agent("other-story", "coder-sonnet", AgentStatus::Running);

        let result = pool
            .start_agent(tmp.path(), "368_story_test", None, None, None)
            .await;
        match result {
            Ok(info) => {
                assert_eq!(
                    info.agent_name, "coder-opus",
                    "should pick the front-matter preferred agent"
                );
            }
            Err(err) => {
                // Allowed to fail for infrastructure reasons (no git repo),
                // but NOT due to agent selection ignoring the preference.
                assert!(
                    !err.contains("All coder agents are busy"),
                    "should not report busy when coder-opus is idle: {err}"
                );
                assert!(
                    !err.contains("coder-sonnet"),
                    "should not have picked coder-sonnet: {err}"
                );
            }
        }
    }

    #[tokio::test]
    async fn start_agent_returns_error_when_front_matter_agent_busy() {
        let tmp = tempfile::tempdir().unwrap();
        let sk = tmp.path().join(".huskies");
        let backlog = sk.join("work/1_backlog");
        let current = sk.join("work/2_current");
        std::fs::create_dir_all(&backlog).unwrap();
        std::fs::create_dir_all(&current).unwrap();
        std::fs::write(
            sk.join("project.toml"),
            r#"
[[agent]]
name = "coder-sonnet"
stage = "coder"

[[agent]]
name = "coder-opus"
stage = "coder"
"#,
        )
        .unwrap();
        let story_content = "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n";
        std::fs::write(backlog.join("368_story_test.md"), story_content).unwrap();
        // Also write to the filesystem current dir and content store so that
        // start_agent reads the correct front matter even when another test has
        // left a stale entry for "368_story_test" in the global CRDT.
        std::fs::write(current.join("368_story_test.md"), story_content).unwrap();
        crate::db::ensure_content_store();
        crate::db::write_content("368_story_test", story_content);

        let pool = AgentPool::new_test(3011);
        // Preferred agent is busy — should NOT fall back to coder-sonnet.
        pool.inject_test_agent("other-story", "coder-opus", AgentStatus::Running);

        let result = pool
            .start_agent(tmp.path(), "368_story_test", None, None, None)
            .await;
        assert!(
            result.is_err(),
            "expected error when preferred agent is busy"
        );
        let err = result.unwrap_err();
        assert!(
            err.contains("coder-opus"),
            "error should mention the preferred agent: {err}"
        );
        assert!(
            err.contains("busy") || err.contains("queued"),
            "error should say agent is busy or story is queued: {err}"
        );
    }
}