Merge branch 'feature/story-97_bug_agent_pool_allows_multiple_instances_of_the_same_agent_to_run_concurrently'

This commit is contained in:
Dave
2026-02-23 20:53:54 +00:00

View File

@@ -211,6 +211,29 @@ impl AgentPool {
agent.status
));
}
// Enforce single-instance concurrency: if this agent is already running on
// any other story, reject the request. The story remains in its current pipeline
// directory and `auto_assign_available_work` will pick it up when the agent
// becomes free.
if let Some(busy_story) = agents.iter().find_map(|(k, a)| {
if a.agent_name == resolved_name
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
{
Some(
k.rsplit_once(':')
.map(|(sid, _)| sid)
.unwrap_or(k)
.to_string(),
)
} else {
None
}
}) {
return Err(format!(
"Agent '{resolved_name}' is already running on story '{busy_story}'; \
story '{story_id}' will be picked up when the agent becomes available"
));
}
}
let (tx, _) = broadcast::channel::<AgentEvent>(1024);
@@ -3397,6 +3420,84 @@ name = "qa"
assert_eq!(find_active_story_stage(tmp.path(), "99_nonexistent"), None);
}
// ── start_agent single-instance concurrency tests ─────────────────────────
/// Regression test for bug 97: the agent pool must reject a second concurrent
/// instance of the same agent name even if it would run on a different story.
#[tokio::test]
async fn start_agent_rejects_when_same_agent_already_running_on_another_story() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Write a minimal project.toml so ProjectConfig::load can find the "qa" agent.
let sk_dir = root.join(".story_kit");
fs::create_dir_all(&sk_dir).unwrap();
fs::write(
sk_dir.join("project.toml"),
"[[agent]]\nname = \"qa\"\n",
)
.unwrap();
let pool = AgentPool::new(3001);
// Simulate qa already running on story-a.
pool.inject_test_agent("story-a", "qa", AgentStatus::Running);
// Attempt to start qa on story-b — must be rejected.
let result = pool
.start_agent(root, "story-b", Some("qa"), None)
.await;
assert!(
result.is_err(),
"start_agent should fail when qa is already running on another story"
);
let err = result.unwrap_err();
assert!(
err.contains("already running") || err.contains("becomes available"),
"error message should explain why: got '{err}'"
);
}
/// Verify that the concurrency guard does NOT block an agent that is merely
/// Completed (not Running/Pending) — completed agents are free for new work.
#[tokio::test]
async fn start_agent_allows_new_story_when_previous_run_is_completed() {
use std::fs;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let sk_dir = root.join(".story_kit");
fs::create_dir_all(&sk_dir).unwrap();
fs::write(
sk_dir.join("project.toml"),
"[[agent]]\nname = \"qa\"\n",
)
.unwrap();
let pool = AgentPool::new(3001);
// Previous run completed — should NOT block a new story.
pool.inject_test_agent("story-a", "qa", AgentStatus::Completed);
// The call will fail eventually (no real worktree / Claude CLI), but it must
// NOT fail at the concurrency check. We detect the difference by inspecting
// the error message: a concurrency rejection says "already running", while a
// later failure (missing story file, missing claude binary, etc.) says something else.
let result = pool
.start_agent(root, "story-b", Some("qa"), None)
.await;
if let Err(ref e) = result {
assert!(
!e.contains("already running") && !e.contains("becomes available"),
"completed agent must not trigger the concurrency guard: got '{e}'"
);
}
// result may be Ok (unlikely in test env) or Err for infra reasons — both fine.
}
// ── worktree_has_committed_work tests ─────────────────────────────────────
#[test]