diff --git a/server/src/agents/pool/start/mod.rs b/server/src/agents/pool/start/mod.rs index 238081ef..314f59cf 100644 --- a/server/src/agents/pool/start/mod.rs +++ b/server/src/agents/pool/start/mod.rs @@ -365,972 +365,6 @@ impl AgentPool { } #[cfg(test)] -mod tests { - use super::super::AgentPool; - use crate::agents::{AgentEvent, AgentStatus}; - - #[tokio::test] - async fn start_agent_auto_selects_second_coder_when_first_busy() { - let tmp = tempfile::tempdir().unwrap(); - let sk = tmp.path().join(".huskies"); - std::fs::create_dir_all(&sk).unwrap(); - std::fs::write( - sk.join("project.toml"), - r#" -[[agent]] -name = "supervisor" -stage = "other" - -[[agent]] -name = "coder-1" -stage = "coder" - -[[agent]] -name = "coder-2" -stage = "coder" -"#, - ) - .unwrap(); - - let pool = AgentPool::new_test(3001); - pool.inject_test_agent("other-story", "coder-1", AgentStatus::Running); - - let result = pool - .start_agent(tmp.path(), "42_my_story", None, None, None) - .await; - match result { - Ok(info) => { - assert_eq!(info.agent_name, "coder-2"); - } - Err(err) => { - assert!( - !err.contains("All coder agents are busy"), - "should have selected coder-2 but got: {err}" - ); - assert!( - !err.contains("No coder agent configured"), - "should not fail on agent selection, got: {err}" - ); - } - } - } - - #[tokio::test] - async fn start_agent_returns_busy_when_all_coders_occupied() { - let tmp = tempfile::tempdir().unwrap(); - let sk = tmp.path().join(".huskies"); - std::fs::create_dir_all(&sk).unwrap(); - std::fs::write( - sk.join("project.toml"), - r#" -[[agent]] -name = "coder-1" -stage = "coder" - -[[agent]] -name = "coder-2" -stage = "coder" -"#, - ) - .unwrap(); - - let pool = AgentPool::new_test(3001); - pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); - pool.inject_test_agent("story-2", "coder-2", AgentStatus::Pending); - - let result = pool - .start_agent(tmp.path(), "story-3", None, None, None) - .await; - assert!(result.is_err()); - let err = result.unwrap_err(); - assert!( - err.contains("All coder agents are busy"), - "expected busy error, got: {err}" - ); - } - - #[tokio::test] - async fn start_agent_moves_story_to_current_when_coders_busy() { - let tmp = tempfile::tempdir().unwrap(); - let sk = tmp.path().join(".huskies"); - let backlog = sk.join("work/1_backlog"); - std::fs::create_dir_all(&backlog).unwrap(); - std::fs::write( - sk.join("project.toml"), - r#" -[[agent]] -name = "coder-1" -stage = "coder" -"#, - ) - .unwrap(); - let story_content = "---\nname: Story 3\n---\n"; - std::fs::write(backlog.join("story-3.md"), story_content).unwrap(); - crate::db::ensure_content_store(); - crate::db::write_content("story-3", story_content); - - let pool = AgentPool::new_test(3001); - pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); - - let result = pool - .start_agent(tmp.path(), "story-3", None, None, None) - .await; - - assert!(result.is_err()); - let err = result.unwrap_err(); - assert!( - err.contains("All coder agents are busy"), - "expected busy error, got: {err}" - ); - assert!( - err.contains("queued in work/2_current/"), - "expected story-to-current message, got: {err}" - ); - - // The lifecycle function updates the content store (not the filesystem), - // so verify the move via the DB. - let content = crate::db::read_content("story-3") - .expect("story-3 should be in content store after move to current"); - assert!( - content.contains("name: Story 3"), - "story-3 content should be preserved after move" - ); - } - - #[tokio::test] - async fn start_agent_story_already_in_current_is_noop() { - let tmp = tempfile::tempdir().unwrap(); - let sk = tmp.path().join(".huskies"); - let current = sk.join("work/2_current"); - std::fs::create_dir_all(¤t).unwrap(); - std::fs::write( - sk.join("project.toml"), - "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n", - ) - .unwrap(); - std::fs::write(current.join("story-5.md"), "---\nname: Story 5\n---\n").unwrap(); - - let pool = AgentPool::new_test(3001); - - let result = pool - .start_agent(tmp.path(), "story-5", None, None, None) - .await; - match result { - Ok(_) => {} - Err(e) => { - assert!( - !e.contains("Failed to move"), - "should not fail on idempotent move, got: {e}" - ); - } - } - } - - #[tokio::test] - async fn start_agent_explicit_name_unchanged_when_busy() { - let tmp = tempfile::tempdir().unwrap(); - let sk = tmp.path().join(".huskies"); - std::fs::create_dir_all(&sk).unwrap(); - std::fs::write( - sk.join("project.toml"), - r#" -[[agent]] -name = "coder-1" -stage = "coder" - -[[agent]] -name = "coder-2" -stage = "coder" -"#, - ) - .unwrap(); - - let pool = AgentPool::new_test(3001); - pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); - - let result = pool - .start_agent(tmp.path(), "story-2", Some("coder-1"), None, None) - .await; - assert!(result.is_err()); - let err = result.unwrap_err(); - assert!( - err.contains("coder-1") && err.contains("already running"), - "expected explicit busy error, got: {err}" - ); - } - - // ── start_agent single-instance concurrency tests ───────────────────────── - - #[tokio::test] - async fn start_agent_rejects_when_same_agent_already_running_on_another_story() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap(); - - let pool = AgentPool::new_test(3001); - pool.inject_test_agent("story-a", "qa", AgentStatus::Running); - - let result = pool - .start_agent(root, "story-b", Some("qa"), None, None) - .await; - - assert!( - result.is_err(), - "start_agent should fail when qa is already running on another story" - ); - let err = result.unwrap_err(); - assert!( - err.contains("already running") || err.contains("becomes available"), - "error message should explain why: got '{err}'" - ); - } - - #[tokio::test] - async fn start_agent_allows_new_story_when_previous_run_is_completed() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap(); - - let pool = AgentPool::new_test(3001); - pool.inject_test_agent("story-a", "qa", AgentStatus::Completed); - - let result = pool - .start_agent(root, "story-b", Some("qa"), None, None) - .await; - - if let Err(ref e) = result { - assert!( - !e.contains("already running") && !e.contains("becomes available"), - "completed agent must not trigger the concurrency guard: got '{e}'" - ); - } - } - - // ── bug 118: pending entry cleanup on start_agent failure ──────────────── - - #[tokio::test] - async fn start_agent_cleans_up_pending_entry_on_failure() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n", - ) - .unwrap(); - - let upcoming = root.join(".huskies/work/1_backlog"); - fs::create_dir_all(&upcoming).unwrap(); - fs::write(upcoming.join("50_story_test.md"), "---\nname: Test\n---\n").unwrap(); - - let pool = AgentPool::new_test(3099); - - let result = pool - .start_agent(root, "50_story_test", Some("coder-1"), None, None) - .await; - - assert!( - result.is_ok(), - "start_agent should return Ok(Pending) immediately: {:?}", - result.err() - ); - assert_eq!( - result.unwrap().status, - AgentStatus::Pending, - "initial status must be Pending" - ); - - let final_info = pool - .wait_for_agent("50_story_test", "coder-1", 5000) - .await - .expect("wait_for_agent should not time out"); - assert_eq!( - final_info.status, - AgentStatus::Failed, - "agent must transition to Failed after worktree creation error" - ); - - let agents = pool.agents.lock().unwrap(); - let failed_entry = agents - .values() - .find(|a| a.agent_name == "coder-1" && a.status == AgentStatus::Failed); - assert!( - failed_entry.is_some(), - "agent pool must retain a Failed entry so the UI can show the error state" - ); - drop(agents); - - let events = pool - .drain_events("50_story_test", "coder-1") - .expect("drain_events should succeed"); - let has_error_event = events.iter().any(|e| matches!(e, AgentEvent::Error { .. })); - assert!( - has_error_event, - "event_log must contain AgentEvent::Error after worktree creation fails" - ); - } - - #[tokio::test] - async fn start_agent_guard_does_not_remove_running_entry() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap(); - - let pool = AgentPool::new_test(3099); - pool.inject_test_agent("story-x", "qa", AgentStatus::Running); - - let result = pool - .start_agent(root, "story-y", Some("qa"), None, None) - .await; - - assert!(result.is_err()); - let err = result.unwrap_err(); - assert!( - err.contains("already running") || err.contains("becomes available"), - "running entry must survive: got '{err}'" - ); - } - - // ── TOCTOU race-condition regression tests (story 132) ─────────────────── - - #[tokio::test] - async fn toctou_pending_entry_blocks_same_agent_on_different_story() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"coder-1\"\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3099); - pool.inject_test_agent("86_story_foo", "coder-1", AgentStatus::Pending); - - let result = pool - .start_agent(root, "130_story_bar", Some("coder-1"), None, None) - .await; - - assert!(result.is_err(), "second start_agent must be rejected"); - let err = result.unwrap_err(); - assert!( - err.contains("already running") || err.contains("becomes available"), - "expected concurrency-rejection message, got: '{err}'" - ); - } - - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn toctou_concurrent_start_agent_same_agent_exactly_one_concurrency_rejection() { - use std::fs; - use std::sync::Arc; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path().to_path_buf(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap(); - fs::write( - root.join(".huskies/project.toml"), - "[[agent]]\nname = \"coder-1\"\n", - ) - .unwrap(); - fs::write( - root.join(".huskies/work/1_backlog/86_story_foo.md"), - "---\nname: Foo\n---\n", - ) - .unwrap(); - fs::write( - root.join(".huskies/work/1_backlog/130_story_bar.md"), - "---\nname: Bar\n---\n", - ) - .unwrap(); - - let pool = Arc::new(AgentPool::new_test(3099)); - - let pool1 = pool.clone(); - let root1 = root.clone(); - let t1 = tokio::spawn(async move { - pool1 - .start_agent(&root1, "86_story_foo", Some("coder-1"), None, None) - .await - }); - - let pool2 = pool.clone(); - let root2 = root.clone(); - let t2 = tokio::spawn(async move { - pool2 - .start_agent(&root2, "130_story_bar", Some("coder-1"), None, None) - .await - }); - - let (r1, r2) = tokio::join!(t1, t2); - let r1 = r1.unwrap(); - let r2 = r2.unwrap(); - - let concurrency_rejections = [&r1, &r2] - .iter() - .filter(|r| { - r.as_ref().is_err_and(|e| { - e.contains("already running") || e.contains("becomes available") - }) - }) - .count(); - - assert_eq!( - concurrency_rejections, 1, - "exactly one call must be rejected by the concurrency check; \ - got r1={r1:?} r2={r2:?}" - ); - } - - // ── story-230: prevent duplicate stage agents on same story ─────────────── - - #[tokio::test] - async fn start_agent_rejects_second_coder_stage_on_same_story() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3099); - pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running); - - let result = pool - .start_agent(root, "42_story_foo", Some("coder-2"), None, None) - .await; - - assert!( - result.is_err(), - "second coder on same story must be rejected" - ); - let err = result.unwrap_err(); - assert!( - err.contains("same pipeline stage"), - "error must mention same pipeline stage, got: '{err}'" - ); - assert!( - err.contains("coder-1") && err.contains("coder-2"), - "error must name both agents, got: '{err}'" - ); - } - - #[tokio::test] - async fn start_agent_rejects_second_qa_stage_on_same_story() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"qa-1\"\nstage = \"qa\"\n\n\ - [[agent]]\nname = \"qa-2\"\nstage = \"qa\"\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3099); - pool.inject_test_agent("55_story_bar", "qa-1", AgentStatus::Running); - - let result = pool - .start_agent(root, "55_story_bar", Some("qa-2"), None, None) - .await; - - assert!(result.is_err(), "second qa on same story must be rejected"); - let err = result.unwrap_err(); - assert!( - err.contains("same pipeline stage"), - "error must mention same pipeline stage, got: '{err}'" - ); - } - - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn start_agent_concurrent_two_coders_same_story_exactly_one_stage_rejection() { - use std::fs; - use std::sync::Arc; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path().to_path_buf(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(sk_dir.join("work/2_current")).unwrap(); - fs::write( - root.join(".huskies/project.toml"), - "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n", - ) - .unwrap(); - fs::write( - root.join(".huskies/work/2_current/42_story_foo.md"), - "---\nname: Foo\n---\n", - ) - .unwrap(); - - let pool = Arc::new(AgentPool::new_test(3099)); - - let pool1 = pool.clone(); - let root1 = root.clone(); - let t1 = tokio::spawn(async move { - pool1 - .start_agent(&root1, "42_story_foo", Some("coder-1"), None, None) - .await - }); - - let pool2 = pool.clone(); - let root2 = root.clone(); - let t2 = tokio::spawn(async move { - pool2 - .start_agent(&root2, "42_story_foo", Some("coder-2"), None, None) - .await - }); - - let (r1, r2) = tokio::join!(t1, t2); - let r1 = r1.unwrap(); - let r2 = r2.unwrap(); - - let stage_rejections = [&r1, &r2] - .iter() - .filter(|r| r.as_ref().is_err_and(|e| e.contains("same pipeline stage"))) - .count(); - - assert_eq!( - stage_rejections, 1, - "exactly one call must be rejected by the stage-conflict check; \ - got r1={r1:?} r2={r2:?}" - ); - } - - #[tokio::test] - async fn start_agent_two_coders_different_stories_not_blocked_by_stage_check() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap(); - fs::write( - root.join(".huskies/project.toml"), - "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n", - ) - .unwrap(); - fs::write( - root.join(".huskies/work/1_backlog/99_story_baz.md"), - "---\nname: Baz\n---\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3099); - pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running); - - let result = pool - .start_agent(root, "99_story_baz", Some("coder-2"), None, None) - .await; - - if let Err(ref e) = result { - assert!( - !e.contains("same pipeline stage"), - "stage-conflict guard must not fire for agents on different stories; \ - got: '{e}'" - ); - } - } - - // ── bug 312: stage-pipeline mismatch guard in start_agent ────────────── - - #[tokio::test] - async fn start_agent_rejects_mergemaster_on_coding_stage_story() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(sk_dir.join("work/2_current")).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\ - [[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", - ) - .unwrap(); - crate::db::ensure_content_store(); - crate::db::write_item_with_content("310_story_foo", "2_current", "---\nname: Foo\n---\n"); - - let pool = AgentPool::new_test(3099); - let result = pool - .start_agent(root, "310_story_foo", Some("mergemaster"), None, None) - .await; - - assert!( - result.is_err(), - "mergemaster must not be assigned to a story in 2_current/" - ); - let err = result.unwrap_err(); - assert!( - err.contains("stage") && err.contains("2_current"), - "error must mention stage mismatch, got: '{err}'" - ); - } - - #[tokio::test] - async fn start_agent_rejects_coder_on_qa_stage_story() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\ - [[agent]]\nname = \"qa\"\nstage = \"qa\"\n", - ) - .unwrap(); - crate::db::ensure_content_store(); - crate::db::write_item_with_content( - "8842_story_qa_guard", - "3_qa", - "---\nname: QA Guard\n---\n", - ); - - let pool = AgentPool::new_test(3099); - let result = pool - .start_agent(root, "8842_story_qa_guard", Some("coder-1"), None, None) - .await; - - assert!( - result.is_err(), - "coder must not be assigned to a story in 3_qa/" - ); - let err = result.unwrap_err(); - assert!( - err.contains("stage") && err.contains("3_qa"), - "error must mention stage mismatch, got: '{err}'" - ); - } - - #[tokio::test] - async fn start_agent_rejects_qa_on_merge_stage_story() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(&sk_dir).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"qa\"\nstage = \"qa\"\n\n\ - [[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", - ) - .unwrap(); - crate::db::ensure_content_store(); - crate::db::write_item_with_content("55_story_baz", "4_merge", "---\nname: Baz\n---\n"); - - let pool = AgentPool::new_test(3099); - let result = pool - .start_agent(root, "55_story_baz", Some("qa"), None, None) - .await; - - assert!( - result.is_err(), - "qa must not be assigned to a story in 4_merge/" - ); - let err = result.unwrap_err(); - assert!( - err.contains("stage") && err.contains("4_merge"), - "error must mention stage mismatch, got: '{err}'" - ); - } - - #[tokio::test] - async fn start_agent_allows_supervisor_on_any_stage() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(sk_dir.join("work/2_current")).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"supervisor\"\nstage = \"other\"\n", - ) - .unwrap(); - fs::write( - sk_dir.join("work/2_current/77_story_sup.md"), - "---\nname: Sup\n---\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3099); - let result = pool - .start_agent(root, "77_story_sup", Some("supervisor"), None, None) - .await; - - match result { - Ok(_) => {} - Err(e) => { - assert!( - !e.contains("stage:") || !e.contains("cannot be assigned"), - "supervisor should not be rejected for stage mismatch, got: '{e}'" - ); - } - } - } - - #[tokio::test] - async fn start_agent_allows_correct_stage_agent() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(sk_dir.join("work/4_merge")).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", - ) - .unwrap(); - fs::write( - sk_dir.join("work/4_merge/88_story_ok.md"), - "---\nname: OK\n---\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3099); - let result = pool - .start_agent(root, "88_story_ok", Some("mergemaster"), None, None) - .await; - - match result { - Ok(_) => {} - Err(e) => { - assert!( - !e.contains("cannot be assigned"), - "mergemaster on 4_merge/ story should not fail stage check, got: '{e}'" - ); - } - } - } - - /// Bug 502: when start_agent is called for a non-Coder agent (mergemaster - /// or qa) on a story that's in 4_merge/, the unconditional - /// move_story_to_current at the top of start_agent must NOT fire — even - /// when a stale split-brain shadow of the story exists in 1_backlog/. - /// - /// Pre-fix behaviour: move_story_to_current would find the 1_backlog - /// shadow and move it to 2_current/. find_active_story_stage would then - /// report 2_current/, the stage check would expect a Coder-stage agent, - /// and mergemaster would be rejected — leaving the story in 2_current/ - /// to be picked up by the next auto-assign tick as a coder. Infinite loop. - /// Observed live on 2026-04-09 against story 478. - #[tokio::test] - async fn start_agent_does_not_demote_merge_stage_story_with_backlog_shadow() { - use std::fs; - - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let sk_dir = root.join(".huskies"); - fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap(); - fs::create_dir_all(sk_dir.join("work/4_merge")).unwrap(); - fs::write( - sk_dir.join("project.toml"), - "[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", - ) - .unwrap(); - // Real copy in 4_merge/ (where the story actually is per the DB). - fs::write( - sk_dir.join("work/4_merge/502_story_split_brain.md"), - "---\nname: Split Brain\n---\n", - ) - .unwrap(); - // Stale split-brain shadow in 1_backlog/ (post-491/492 migration - // artifact — the filesystem shadow that bit us in production). - fs::write( - sk_dir.join("work/1_backlog/502_story_split_brain.md"), - "---\nname: Split Brain\n---\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3098); - let result = pool - .start_agent( - root, - "502_story_split_brain", - Some("mergemaster"), - None, - None, - ) - .await; - - // Stage check must not reject mergemaster. - if let Err(ref e) = result { - assert!( - !e.contains("cannot be assigned"), - "mergemaster on 4_merge/ story must not fail stage check even \ - when a 1_backlog shadow exists, got: '{e}'" - ); - } - - // Critical: the story must still be in 4_merge/ after the call. - // Before the fix, line 53 of start.rs would have demoted it to - // 2_current/ via move_story_to_current finding the 1_backlog shadow. - assert!( - sk_dir - .join("work/4_merge/502_story_split_brain.md") - .exists(), - "story must still be in 4_merge/ after start_agent(mergemaster, ...)" - ); - assert!( - !sk_dir - .join("work/2_current/502_story_split_brain.md") - .exists(), - "story must NOT have been demoted to 2_current/ — that's bug 502" - ); - } - - // ── front matter agent preference (bug 379) ────────────────────────────── - - #[tokio::test] - async fn start_agent_honours_front_matter_agent_when_idle() { - let tmp = tempfile::tempdir().unwrap(); - let sk = tmp.path().join(".huskies"); - let backlog = sk.join("work/1_backlog"); - std::fs::create_dir_all(&backlog).unwrap(); - std::fs::write( - sk.join("project.toml"), - r#" -[[agent]] -name = "coder-sonnet" -stage = "coder" - -[[agent]] -name = "coder-opus" -stage = "coder" -"#, - ) - .unwrap(); - // Story file with agent preference in front matter. - std::fs::write( - backlog.join("368_story_test.md"), - "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n", - ) - .unwrap(); - - let pool = AgentPool::new_test(3010); - // coder-sonnet is busy so without front matter the auto-selection - // would skip coder-opus and try something else. - pool.inject_test_agent("other-story", "coder-sonnet", AgentStatus::Running); - - let result = pool - .start_agent(tmp.path(), "368_story_test", None, None, None) - .await; - match result { - Ok(info) => { - assert_eq!( - info.agent_name, "coder-opus", - "should pick the front-matter preferred agent" - ); - } - Err(err) => { - // Allowed to fail for infrastructure reasons (no git repo), - // but NOT due to agent selection ignoring the preference. - assert!( - !err.contains("All coder agents are busy"), - "should not report busy when coder-opus is idle: {err}" - ); - assert!( - !err.contains("coder-sonnet"), - "should not have picked coder-sonnet: {err}" - ); - } - } - } - - #[tokio::test] - async fn start_agent_returns_error_when_front_matter_agent_busy() { - let tmp = tempfile::tempdir().unwrap(); - let sk = tmp.path().join(".huskies"); - let backlog = sk.join("work/1_backlog"); - let current = sk.join("work/2_current"); - std::fs::create_dir_all(&backlog).unwrap(); - std::fs::create_dir_all(¤t).unwrap(); - std::fs::write( - sk.join("project.toml"), - r#" -[[agent]] -name = "coder-sonnet" -stage = "coder" - -[[agent]] -name = "coder-opus" -stage = "coder" -"#, - ) - .unwrap(); - let story_content = "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n"; - std::fs::write(backlog.join("368_story_test.md"), story_content).unwrap(); - // Also write to the filesystem current dir and content store so that - // start_agent reads the correct front matter even when another test has - // left a stale entry for "368_story_test" in the global CRDT. - std::fs::write(current.join("368_story_test.md"), story_content).unwrap(); - crate::db::ensure_content_store(); - crate::db::write_content("368_story_test", story_content); - - let pool = AgentPool::new_test(3011); - // Preferred agent is busy — should NOT fall back to coder-sonnet. - pool.inject_test_agent("other-story", "coder-opus", AgentStatus::Running); - - let result = pool - .start_agent(tmp.path(), "368_story_test", None, None, None) - .await; - assert!( - result.is_err(), - "expected error when preferred agent is busy" - ); - let err = result.unwrap_err(); - assert!( - err.contains("coder-opus"), - "error should mention the preferred agent: {err}" - ); - assert!( - err.contains("busy") || err.contains("queued"), - "error should say agent is busy or story is queued: {err}" - ); - } -} +mod tests_concurrency; +#[cfg(test)] +mod tests_selection; diff --git a/server/src/agents/pool/start/tests_concurrency.rs b/server/src/agents/pool/start/tests_concurrency.rs new file mode 100644 index 00000000..9fe21bfb --- /dev/null +++ b/server/src/agents/pool/start/tests_concurrency.rs @@ -0,0 +1,665 @@ +//! Tests for single-instance concurrency, TOCTOU races, duplicate-stage +//! prevention, and pipeline-stage mismatch guards in `AgentPool::start_agent`. + +use super::super::AgentPool; +use crate::agents::{AgentEvent, AgentStatus}; + +// ── start_agent single-instance concurrency tests ───────────────────────── + +#[tokio::test] +async fn start_agent_rejects_when_same_agent_already_running_on_another_story() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap(); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-a", "qa", AgentStatus::Running); + + let result = pool + .start_agent(root, "story-b", Some("qa"), None, None) + .await; + + assert!( + result.is_err(), + "start_agent should fail when qa is already running on another story" + ); + let err = result.unwrap_err(); + assert!( + err.contains("already running") || err.contains("becomes available"), + "error message should explain why: got '{err}'" + ); +} + +#[tokio::test] +async fn start_agent_allows_new_story_when_previous_run_is_completed() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap(); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-a", "qa", AgentStatus::Completed); + + let result = pool + .start_agent(root, "story-b", Some("qa"), None, None) + .await; + + if let Err(ref e) = result { + assert!( + !e.contains("already running") && !e.contains("becomes available"), + "completed agent must not trigger the concurrency guard: got '{e}'" + ); + } +} + +// ── bug 118: pending entry cleanup on start_agent failure ──────────────── + +#[tokio::test] +async fn start_agent_cleans_up_pending_entry_on_failure() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n", + ) + .unwrap(); + + let upcoming = root.join(".huskies/work/1_backlog"); + fs::create_dir_all(&upcoming).unwrap(); + fs::write(upcoming.join("50_story_test.md"), "---\nname: Test\n---\n").unwrap(); + + let pool = AgentPool::new_test(3099); + + let result = pool + .start_agent(root, "50_story_test", Some("coder-1"), None, None) + .await; + + assert!( + result.is_ok(), + "start_agent should return Ok(Pending) immediately: {:?}", + result.err() + ); + assert_eq!( + result.unwrap().status, + AgentStatus::Pending, + "initial status must be Pending" + ); + + let final_info = pool + .wait_for_agent("50_story_test", "coder-1", 5000) + .await + .expect("wait_for_agent should not time out"); + assert_eq!( + final_info.status, + AgentStatus::Failed, + "agent must transition to Failed after worktree creation error" + ); + + let agents = pool.agents.lock().unwrap(); + let failed_entry = agents + .values() + .find(|a| a.agent_name == "coder-1" && a.status == AgentStatus::Failed); + assert!( + failed_entry.is_some(), + "agent pool must retain a Failed entry so the UI can show the error state" + ); + drop(agents); + + let events = pool + .drain_events("50_story_test", "coder-1") + .expect("drain_events should succeed"); + let has_error_event = events.iter().any(|e| matches!(e, AgentEvent::Error { .. })); + assert!( + has_error_event, + "event_log must contain AgentEvent::Error after worktree creation fails" + ); +} + +#[tokio::test] +async fn start_agent_guard_does_not_remove_running_entry() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write(sk_dir.join("project.toml"), "[[agent]]\nname = \"qa\"\n").unwrap(); + + let pool = AgentPool::new_test(3099); + pool.inject_test_agent("story-x", "qa", AgentStatus::Running); + + let result = pool + .start_agent(root, "story-y", Some("qa"), None, None) + .await; + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("already running") || err.contains("becomes available"), + "running entry must survive: got '{err}'" + ); +} + +// ── TOCTOU race-condition regression tests (story 132) ─────────────────── + +#[tokio::test] +async fn toctou_pending_entry_blocks_same_agent_on_different_story() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"coder-1\"\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3099); + pool.inject_test_agent("86_story_foo", "coder-1", AgentStatus::Pending); + + let result = pool + .start_agent(root, "130_story_bar", Some("coder-1"), None, None) + .await; + + assert!(result.is_err(), "second start_agent must be rejected"); + let err = result.unwrap_err(); + assert!( + err.contains("already running") || err.contains("becomes available"), + "expected concurrency-rejection message, got: '{err}'" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn toctou_concurrent_start_agent_same_agent_exactly_one_concurrency_rejection() { + use std::fs; + use std::sync::Arc; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path().to_path_buf(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap(); + fs::write( + root.join(".huskies/project.toml"), + "[[agent]]\nname = \"coder-1\"\n", + ) + .unwrap(); + fs::write( + root.join(".huskies/work/1_backlog/86_story_foo.md"), + "---\nname: Foo\n---\n", + ) + .unwrap(); + fs::write( + root.join(".huskies/work/1_backlog/130_story_bar.md"), + "---\nname: Bar\n---\n", + ) + .unwrap(); + + let pool = Arc::new(AgentPool::new_test(3099)); + + let pool1 = pool.clone(); + let root1 = root.clone(); + let t1 = tokio::spawn(async move { + pool1 + .start_agent(&root1, "86_story_foo", Some("coder-1"), None, None) + .await + }); + + let pool2 = pool.clone(); + let root2 = root.clone(); + let t2 = tokio::spawn(async move { + pool2 + .start_agent(&root2, "130_story_bar", Some("coder-1"), None, None) + .await + }); + + let (r1, r2) = tokio::join!(t1, t2); + let r1 = r1.unwrap(); + let r2 = r2.unwrap(); + + let concurrency_rejections = [&r1, &r2] + .iter() + .filter(|r| { + r.as_ref() + .is_err_and(|e| e.contains("already running") || e.contains("becomes available")) + }) + .count(); + + assert_eq!( + concurrency_rejections, 1, + "exactly one call must be rejected by the concurrency check; \ + got r1={r1:?} r2={r2:?}" + ); +} + +// ── story-230: prevent duplicate stage agents on same story ─────────────── + +#[tokio::test] +async fn start_agent_rejects_second_coder_stage_on_same_story() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3099); + pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running); + + let result = pool + .start_agent(root, "42_story_foo", Some("coder-2"), None, None) + .await; + + assert!( + result.is_err(), + "second coder on same story must be rejected" + ); + let err = result.unwrap_err(); + assert!( + err.contains("same pipeline stage"), + "error must mention same pipeline stage, got: '{err}'" + ); + assert!( + err.contains("coder-1") && err.contains("coder-2"), + "error must name both agents, got: '{err}'" + ); +} + +#[tokio::test] +async fn start_agent_rejects_second_qa_stage_on_same_story() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"qa-1\"\nstage = \"qa\"\n\n\ + [[agent]]\nname = \"qa-2\"\nstage = \"qa\"\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3099); + pool.inject_test_agent("55_story_bar", "qa-1", AgentStatus::Running); + + let result = pool + .start_agent(root, "55_story_bar", Some("qa-2"), None, None) + .await; + + assert!(result.is_err(), "second qa on same story must be rejected"); + let err = result.unwrap_err(); + assert!( + err.contains("same pipeline stage"), + "error must mention same pipeline stage, got: '{err}'" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn start_agent_concurrent_two_coders_same_story_exactly_one_stage_rejection() { + use std::fs; + use std::sync::Arc; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path().to_path_buf(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(sk_dir.join("work/2_current")).unwrap(); + fs::write( + root.join(".huskies/project.toml"), + "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n", + ) + .unwrap(); + fs::write( + root.join(".huskies/work/2_current/42_story_foo.md"), + "---\nname: Foo\n---\n", + ) + .unwrap(); + + let pool = Arc::new(AgentPool::new_test(3099)); + + let pool1 = pool.clone(); + let root1 = root.clone(); + let t1 = tokio::spawn(async move { + pool1 + .start_agent(&root1, "42_story_foo", Some("coder-1"), None, None) + .await + }); + + let pool2 = pool.clone(); + let root2 = root.clone(); + let t2 = tokio::spawn(async move { + pool2 + .start_agent(&root2, "42_story_foo", Some("coder-2"), None, None) + .await + }); + + let (r1, r2) = tokio::join!(t1, t2); + let r1 = r1.unwrap(); + let r2 = r2.unwrap(); + + let stage_rejections = [&r1, &r2] + .iter() + .filter(|r| r.as_ref().is_err_and(|e| e.contains("same pipeline stage"))) + .count(); + + assert_eq!( + stage_rejections, 1, + "exactly one call must be rejected by the stage-conflict check; \ + got r1={r1:?} r2={r2:?}" + ); +} + +#[tokio::test] +async fn start_agent_two_coders_different_stories_not_blocked_by_stage_check() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap(); + fs::write( + root.join(".huskies/project.toml"), + "[[agent]]\nname = \"coder-1\"\n\n[[agent]]\nname = \"coder-2\"\n", + ) + .unwrap(); + fs::write( + root.join(".huskies/work/1_backlog/99_story_baz.md"), + "---\nname: Baz\n---\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3099); + pool.inject_test_agent("42_story_foo", "coder-1", AgentStatus::Running); + + let result = pool + .start_agent(root, "99_story_baz", Some("coder-2"), None, None) + .await; + + if let Err(ref e) = result { + assert!( + !e.contains("same pipeline stage"), + "stage-conflict guard must not fire for agents on different stories; \ + got: '{e}'" + ); + } +} + +// ── bug 312: stage-pipeline mismatch guard in start_agent ────────────── + +#[tokio::test] +async fn start_agent_rejects_mergemaster_on_coding_stage_story() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(sk_dir.join("work/2_current")).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\ + [[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", + ) + .unwrap(); + crate::db::ensure_content_store(); + crate::db::write_item_with_content("310_story_foo", "2_current", "---\nname: Foo\n---\n"); + + let pool = AgentPool::new_test(3099); + let result = pool + .start_agent(root, "310_story_foo", Some("mergemaster"), None, None) + .await; + + assert!( + result.is_err(), + "mergemaster must not be assigned to a story in 2_current/" + ); + let err = result.unwrap_err(); + assert!( + err.contains("stage") && err.contains("2_current"), + "error must mention stage mismatch, got: '{err}'" + ); +} + +#[tokio::test] +async fn start_agent_rejects_coder_on_qa_stage_story() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n\n\ + [[agent]]\nname = \"qa\"\nstage = \"qa\"\n", + ) + .unwrap(); + crate::db::ensure_content_store(); + crate::db::write_item_with_content("8842_story_qa_guard", "3_qa", "---\nname: QA Guard\n---\n"); + + let pool = AgentPool::new_test(3099); + let result = pool + .start_agent(root, "8842_story_qa_guard", Some("coder-1"), None, None) + .await; + + assert!( + result.is_err(), + "coder must not be assigned to a story in 3_qa/" + ); + let err = result.unwrap_err(); + assert!( + err.contains("stage") && err.contains("3_qa"), + "error must mention stage mismatch, got: '{err}'" + ); +} + +#[tokio::test] +async fn start_agent_rejects_qa_on_merge_stage_story() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(&sk_dir).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"qa\"\nstage = \"qa\"\n\n\ + [[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", + ) + .unwrap(); + crate::db::ensure_content_store(); + crate::db::write_item_with_content("55_story_baz", "4_merge", "---\nname: Baz\n---\n"); + + let pool = AgentPool::new_test(3099); + let result = pool + .start_agent(root, "55_story_baz", Some("qa"), None, None) + .await; + + assert!( + result.is_err(), + "qa must not be assigned to a story in 4_merge/" + ); + let err = result.unwrap_err(); + assert!( + err.contains("stage") && err.contains("4_merge"), + "error must mention stage mismatch, got: '{err}'" + ); +} + +#[tokio::test] +async fn start_agent_allows_supervisor_on_any_stage() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(sk_dir.join("work/2_current")).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"supervisor\"\nstage = \"other\"\n", + ) + .unwrap(); + fs::write( + sk_dir.join("work/2_current/77_story_sup.md"), + "---\nname: Sup\n---\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3099); + let result = pool + .start_agent(root, "77_story_sup", Some("supervisor"), None, None) + .await; + + match result { + Ok(_) => {} + Err(e) => { + assert!( + !e.contains("stage:") || !e.contains("cannot be assigned"), + "supervisor should not be rejected for stage mismatch, got: '{e}'" + ); + } + } +} + +#[tokio::test] +async fn start_agent_allows_correct_stage_agent() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(sk_dir.join("work/4_merge")).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", + ) + .unwrap(); + fs::write( + sk_dir.join("work/4_merge/88_story_ok.md"), + "---\nname: OK\n---\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3099); + let result = pool + .start_agent(root, "88_story_ok", Some("mergemaster"), None, None) + .await; + + match result { + Ok(_) => {} + Err(e) => { + assert!( + !e.contains("cannot be assigned"), + "mergemaster on 4_merge/ story should not fail stage check, got: '{e}'" + ); + } + } +} + +/// Bug 502: when start_agent is called for a non-Coder agent (mergemaster +/// or qa) on a story that's in 4_merge/, the unconditional +/// move_story_to_current at the top of start_agent must NOT fire — even +/// when a stale split-brain shadow of the story exists in 1_backlog/. +/// +/// Pre-fix behaviour: move_story_to_current would find the 1_backlog +/// shadow and move it to 2_current/. find_active_story_stage would then +/// report 2_current/, the stage check would expect a Coder-stage agent, +/// and mergemaster would be rejected — leaving the story in 2_current/ +/// to be picked up by the next auto-assign tick as a coder. Infinite loop. +/// Observed live on 2026-04-09 against story 478. +#[tokio::test] +async fn start_agent_does_not_demote_merge_stage_story_with_backlog_shadow() { + use std::fs; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let sk_dir = root.join(".huskies"); + fs::create_dir_all(sk_dir.join("work/1_backlog")).unwrap(); + fs::create_dir_all(sk_dir.join("work/4_merge")).unwrap(); + fs::write( + sk_dir.join("project.toml"), + "[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n", + ) + .unwrap(); + // Real copy in 4_merge/ (where the story actually is per the DB). + fs::write( + sk_dir.join("work/4_merge/502_story_split_brain.md"), + "---\nname: Split Brain\n---\n", + ) + .unwrap(); + // Stale split-brain shadow in 1_backlog/ (post-491/492 migration + // artifact — the filesystem shadow that bit us in production). + fs::write( + sk_dir.join("work/1_backlog/502_story_split_brain.md"), + "---\nname: Split Brain\n---\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3098); + let result = pool + .start_agent( + root, + "502_story_split_brain", + Some("mergemaster"), + None, + None, + ) + .await; + + // Stage check must not reject mergemaster. + if let Err(ref e) = result { + assert!( + !e.contains("cannot be assigned"), + "mergemaster on 4_merge/ story must not fail stage check even \ + when a 1_backlog shadow exists, got: '{e}'" + ); + } + + // Critical: the story must still be in 4_merge/ after the call. + // Before the fix, line 53 of start.rs would have demoted it to + // 2_current/ via move_story_to_current finding the 1_backlog shadow. + assert!( + sk_dir + .join("work/4_merge/502_story_split_brain.md") + .exists(), + "story must still be in 4_merge/ after start_agent(mergemaster, ...)" + ); + assert!( + !sk_dir + .join("work/2_current/502_story_split_brain.md") + .exists(), + "story must NOT have been demoted to 2_current/ — that's bug 502" + ); +} diff --git a/server/src/agents/pool/start/tests_selection.rs b/server/src/agents/pool/start/tests_selection.rs new file mode 100644 index 00000000..9b75a82f --- /dev/null +++ b/server/src/agents/pool/start/tests_selection.rs @@ -0,0 +1,305 @@ +//! Tests for basic coder selection and front-matter agent preference in +//! `AgentPool::start_agent`. + +use super::super::AgentPool; +use crate::agents::{AgentEvent, AgentStatus}; + +#[tokio::test] +async fn start_agent_auto_selects_second_coder_when_first_busy() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".huskies"); + std::fs::create_dir_all(&sk).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "supervisor" +stage = "other" + +[[agent]] +name = "coder-1" +stage = "coder" + +[[agent]] +name = "coder-2" +stage = "coder" +"#, + ) + .unwrap(); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("other-story", "coder-1", AgentStatus::Running); + + let result = pool + .start_agent(tmp.path(), "42_my_story", None, None, None) + .await; + match result { + Ok(info) => { + assert_eq!(info.agent_name, "coder-2"); + } + Err(err) => { + assert!( + !err.contains("All coder agents are busy"), + "should have selected coder-2 but got: {err}" + ); + assert!( + !err.contains("No coder agent configured"), + "should not fail on agent selection, got: {err}" + ); + } + } +} + +#[tokio::test] +async fn start_agent_returns_busy_when_all_coders_occupied() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".huskies"); + std::fs::create_dir_all(&sk).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "coder-1" +stage = "coder" + +[[agent]] +name = "coder-2" +stage = "coder" +"#, + ) + .unwrap(); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); + pool.inject_test_agent("story-2", "coder-2", AgentStatus::Pending); + + let result = pool + .start_agent(tmp.path(), "story-3", None, None, None) + .await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("All coder agents are busy"), + "expected busy error, got: {err}" + ); +} + +#[tokio::test] +async fn start_agent_moves_story_to_current_when_coders_busy() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".huskies"); + let backlog = sk.join("work/1_backlog"); + std::fs::create_dir_all(&backlog).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "coder-1" +stage = "coder" +"#, + ) + .unwrap(); + let story_content = "---\nname: Story 3\n---\n"; + std::fs::write(backlog.join("story-3.md"), story_content).unwrap(); + crate::db::ensure_content_store(); + crate::db::write_content("story-3", story_content); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); + + let result = pool + .start_agent(tmp.path(), "story-3", None, None, None) + .await; + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("All coder agents are busy"), + "expected busy error, got: {err}" + ); + assert!( + err.contains("queued in work/2_current/"), + "expected story-to-current message, got: {err}" + ); + + // The lifecycle function updates the content store (not the filesystem), + // so verify the move via the DB. + let content = crate::db::read_content("story-3") + .expect("story-3 should be in content store after move to current"); + assert!( + content.contains("name: Story 3"), + "story-3 content should be preserved after move" + ); +} + +#[tokio::test] +async fn start_agent_story_already_in_current_is_noop() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".huskies"); + let current = sk.join("work/2_current"); + std::fs::create_dir_all(¤t).unwrap(); + std::fs::write( + sk.join("project.toml"), + "[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n", + ) + .unwrap(); + std::fs::write(current.join("story-5.md"), "---\nname: Story 5\n---\n").unwrap(); + + let pool = AgentPool::new_test(3001); + + let result = pool + .start_agent(tmp.path(), "story-5", None, None, None) + .await; + match result { + Ok(_) => {} + Err(e) => { + assert!( + !e.contains("Failed to move"), + "should not fail on idempotent move, got: {e}" + ); + } + } +} + +#[tokio::test] +async fn start_agent_explicit_name_unchanged_when_busy() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".huskies"); + std::fs::create_dir_all(&sk).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "coder-1" +stage = "coder" + +[[agent]] +name = "coder-2" +stage = "coder" +"#, + ) + .unwrap(); + + let pool = AgentPool::new_test(3001); + pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running); + + let result = pool + .start_agent(tmp.path(), "story-2", Some("coder-1"), None, None) + .await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("coder-1") && err.contains("already running"), + "expected explicit busy error, got: {err}" + ); +} + +// ── front matter agent preference (bug 379) ────────────────────────────── + +#[tokio::test] +async fn start_agent_honours_front_matter_agent_when_idle() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".huskies"); + let backlog = sk.join("work/1_backlog"); + std::fs::create_dir_all(&backlog).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "coder-sonnet" +stage = "coder" + +[[agent]] +name = "coder-opus" +stage = "coder" +"#, + ) + .unwrap(); + // Story file with agent preference in front matter. + std::fs::write( + backlog.join("368_story_test.md"), + "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n", + ) + .unwrap(); + + let pool = AgentPool::new_test(3010); + // coder-sonnet is busy so without front matter the auto-selection + // would skip coder-opus and try something else. + pool.inject_test_agent("other-story", "coder-sonnet", AgentStatus::Running); + + let result = pool + .start_agent(tmp.path(), "368_story_test", None, None, None) + .await; + match result { + Ok(info) => { + assert_eq!( + info.agent_name, "coder-opus", + "should pick the front-matter preferred agent" + ); + } + Err(err) => { + // Allowed to fail for infrastructure reasons (no git repo), + // but NOT due to agent selection ignoring the preference. + assert!( + !err.contains("All coder agents are busy"), + "should not report busy when coder-opus is idle: {err}" + ); + assert!( + !err.contains("coder-sonnet"), + "should not have picked coder-sonnet: {err}" + ); + } + } +} + +#[tokio::test] +async fn start_agent_returns_error_when_front_matter_agent_busy() { + let tmp = tempfile::tempdir().unwrap(); + let sk = tmp.path().join(".huskies"); + let backlog = sk.join("work/1_backlog"); + let current = sk.join("work/2_current"); + std::fs::create_dir_all(&backlog).unwrap(); + std::fs::create_dir_all(¤t).unwrap(); + std::fs::write( + sk.join("project.toml"), + r#" +[[agent]] +name = "coder-sonnet" +stage = "coder" + +[[agent]] +name = "coder-opus" +stage = "coder" +"#, + ) + .unwrap(); + let story_content = "---\nname: Test Story\nagent: coder-opus\n---\n# Story 368\n"; + std::fs::write(backlog.join("368_story_test.md"), story_content).unwrap(); + // Also write to the filesystem current dir and content store so that + // start_agent reads the correct front matter even when another test has + // left a stale entry for "368_story_test" in the global CRDT. + std::fs::write(current.join("368_story_test.md"), story_content).unwrap(); + crate::db::ensure_content_store(); + crate::db::write_content("368_story_test", story_content); + + let pool = AgentPool::new_test(3011); + // Preferred agent is busy — should NOT fall back to coder-sonnet. + pool.inject_test_agent("other-story", "coder-opus", AgentStatus::Running); + + let result = pool + .start_agent(tmp.path(), "368_story_test", None, None, None) + .await; + assert!( + result.is_err(), + "expected error when preferred agent is busy" + ); + let err = result.unwrap_err(); + assert!( + err.contains("coder-opus"), + "error should mention the preferred agent: {err}" + ); + assert!( + err.contains("busy") || err.contains("queued"), + "error should say agent is busy or story is queued: {err}" + ); +}