Merge branch 'feature/story-97_bug_agent_pool_allows_multiple_instances_of_the_same_agent_to_run_concurrently'
This commit is contained in:
@@ -211,6 +211,29 @@ impl AgentPool {
|
|||||||
agent.status
|
agent.status
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
// Enforce single-instance concurrency: if this agent is already running on
|
||||||
|
// any other story, reject the request. The story remains in its current pipeline
|
||||||
|
// directory and `auto_assign_available_work` will pick it up when the agent
|
||||||
|
// becomes free.
|
||||||
|
if let Some(busy_story) = agents.iter().find_map(|(k, a)| {
|
||||||
|
if a.agent_name == resolved_name
|
||||||
|
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
|
||||||
|
{
|
||||||
|
Some(
|
||||||
|
k.rsplit_once(':')
|
||||||
|
.map(|(sid, _)| sid)
|
||||||
|
.unwrap_or(k)
|
||||||
|
.to_string(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
return Err(format!(
|
||||||
|
"Agent '{resolved_name}' is already running on story '{busy_story}'; \
|
||||||
|
story '{story_id}' will be picked up when the agent becomes available"
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let (tx, _) = broadcast::channel::<AgentEvent>(1024);
|
let (tx, _) = broadcast::channel::<AgentEvent>(1024);
|
||||||
@@ -3397,6 +3420,84 @@ name = "qa"
|
|||||||
assert_eq!(find_active_story_stage(tmp.path(), "99_nonexistent"), None);
|
assert_eq!(find_active_story_stage(tmp.path(), "99_nonexistent"), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── start_agent single-instance concurrency tests ─────────────────────────
|
||||||
|
|
||||||
|
/// Regression test for bug 97: the agent pool must reject a second concurrent
|
||||||
|
/// instance of the same agent name even if it would run on a different story.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn start_agent_rejects_when_same_agent_already_running_on_another_story() {
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let root = tmp.path();
|
||||||
|
|
||||||
|
// Write a minimal project.toml so ProjectConfig::load can find the "qa" agent.
|
||||||
|
let sk_dir = root.join(".story_kit");
|
||||||
|
fs::create_dir_all(&sk_dir).unwrap();
|
||||||
|
fs::write(
|
||||||
|
sk_dir.join("project.toml"),
|
||||||
|
"[[agent]]\nname = \"qa\"\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let pool = AgentPool::new(3001);
|
||||||
|
// Simulate qa already running on story-a.
|
||||||
|
pool.inject_test_agent("story-a", "qa", AgentStatus::Running);
|
||||||
|
|
||||||
|
// Attempt to start qa on story-b — must be rejected.
|
||||||
|
let result = pool
|
||||||
|
.start_agent(root, "story-b", Some("qa"), None)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
result.is_err(),
|
||||||
|
"start_agent should fail when qa is already running on another story"
|
||||||
|
);
|
||||||
|
let err = result.unwrap_err();
|
||||||
|
assert!(
|
||||||
|
err.contains("already running") || err.contains("becomes available"),
|
||||||
|
"error message should explain why: got '{err}'"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify that the concurrency guard does NOT block an agent that is merely
|
||||||
|
/// Completed (not Running/Pending) — completed agents are free for new work.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn start_agent_allows_new_story_when_previous_run_is_completed() {
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let root = tmp.path();
|
||||||
|
|
||||||
|
let sk_dir = root.join(".story_kit");
|
||||||
|
fs::create_dir_all(&sk_dir).unwrap();
|
||||||
|
fs::write(
|
||||||
|
sk_dir.join("project.toml"),
|
||||||
|
"[[agent]]\nname = \"qa\"\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let pool = AgentPool::new(3001);
|
||||||
|
// Previous run completed — should NOT block a new story.
|
||||||
|
pool.inject_test_agent("story-a", "qa", AgentStatus::Completed);
|
||||||
|
|
||||||
|
// The call will fail eventually (no real worktree / Claude CLI), but it must
|
||||||
|
// NOT fail at the concurrency check. We detect the difference by inspecting
|
||||||
|
// the error message: a concurrency rejection says "already running", while a
|
||||||
|
// later failure (missing story file, missing claude binary, etc.) says something else.
|
||||||
|
let result = pool
|
||||||
|
.start_agent(root, "story-b", Some("qa"), None)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if let Err(ref e) = result {
|
||||||
|
assert!(
|
||||||
|
!e.contains("already running") && !e.contains("becomes available"),
|
||||||
|
"completed agent must not trigger the concurrency guard: got '{e}'"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// result may be Ok (unlikely in test env) or Err for infra reasons — both fine.
|
||||||
|
}
|
||||||
|
|
||||||
// ── worktree_has_committed_work tests ─────────────────────────────────────
|
// ── worktree_has_committed_work tests ─────────────────────────────────────
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user