story-kit: merge 190_story_auto_select_available_agent_for_stage_in_start_agent
This commit is contained in:
@@ -323,7 +323,11 @@ impl AgentPool {
|
||||
}
|
||||
|
||||
/// Start an agent for a story: load config, create worktree, spawn agent.
|
||||
/// If `agent_name` is None, defaults to the first configured agent.
|
||||
///
|
||||
/// When `agent_name` is `None`, automatically selects the first idle coder
|
||||
/// agent (story 190). If all coders are busy the call fails with an error
|
||||
/// indicating the story will be picked up when one becomes available.
|
||||
///
|
||||
/// If `resume_context` is provided, it is appended to the rendered prompt
|
||||
/// so the agent can pick up from a previous failed attempt.
|
||||
pub async fn start_agent(
|
||||
@@ -335,59 +339,56 @@ impl AgentPool {
|
||||
) -> Result<AgentInfo, String> {
|
||||
let config = ProjectConfig::load(project_root)?;
|
||||
|
||||
// Resolve agent name from config
|
||||
let resolved_name = match agent_name {
|
||||
Some(name) => {
|
||||
// Validate explicit agent name early (no lock needed).
|
||||
if let Some(name) = agent_name {
|
||||
config
|
||||
.find_agent(name)
|
||||
.ok_or_else(|| format!("No agent named '{name}' in config"))?;
|
||||
name.to_string()
|
||||
}
|
||||
None => config
|
||||
.default_coder_agent()
|
||||
.ok_or_else(|| {
|
||||
"No coder agent configured. Specify an agent_name explicitly.".to_string()
|
||||
})?
|
||||
.name
|
||||
.clone(),
|
||||
};
|
||||
|
||||
let key = composite_key(story_id, &resolved_name);
|
||||
|
||||
// Create shared resources before the atomic check-and-insert so the
|
||||
// agents lock is held continuously from the availability check through
|
||||
// the Pending insert, eliminating the TOCTOU race (story 132).
|
||||
// Create name-independent shared resources before the lock so they are
|
||||
// ready for the atomic check-and-insert (story 132).
|
||||
let (tx, _) = broadcast::channel::<AgentEvent>(1024);
|
||||
|
||||
let event_log: Arc<Mutex<Vec<AgentEvent>>> = Arc::new(Mutex::new(Vec::new()));
|
||||
|
||||
// Generate a unique session ID for the persistent log file.
|
||||
let log_session_id = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
// Create persistent log writer.
|
||||
let log_writer = match AgentLogWriter::new(
|
||||
project_root,
|
||||
story_id,
|
||||
&resolved_name,
|
||||
&log_session_id,
|
||||
) {
|
||||
Ok(w) => Some(Arc::new(Mutex::new(w))),
|
||||
Err(e) => {
|
||||
eprintln!("[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}");
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Atomically check availability and register as Pending. The lock is
|
||||
// held continuously from the duplicate check through the HashMap insert
|
||||
// so no concurrent start_agent call can slip through the check before
|
||||
// this insert completes (fixes TOCTOU race, story 132).
|
||||
// Atomically resolve agent name, check availability, and register as
|
||||
// Pending. When `agent_name` is `None` the first idle coder is
|
||||
// selected inside the lock so no TOCTOU race can occur between the
|
||||
// availability check and the Pending insert (story 132, story 190).
|
||||
//
|
||||
// The `PendingGuard` ensures that if any step below fails the entry is
|
||||
// removed from the pool so it does not permanently block auto-assign
|
||||
// (bug 118).
|
||||
let resolved_name: String;
|
||||
let key: String;
|
||||
{
|
||||
let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
|
||||
|
||||
resolved_name = match agent_name {
|
||||
Some(name) => name.to_string(),
|
||||
None => find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder)
|
||||
.map(|s| s.to_string())
|
||||
.ok_or_else(|| {
|
||||
if config
|
||||
.agent
|
||||
.iter()
|
||||
.any(|a| agent_config_stage(a) == PipelineStage::Coder)
|
||||
{
|
||||
format!(
|
||||
"All coder agents are busy; story '{story_id}' will be \
|
||||
picked up when one becomes available"
|
||||
)
|
||||
} else {
|
||||
"No coder agent configured. Specify an agent_name explicitly."
|
||||
.to_string()
|
||||
}
|
||||
})?,
|
||||
};
|
||||
|
||||
key = composite_key(story_id, &resolved_name);
|
||||
|
||||
// Check for duplicate assignment (same story + same agent already active).
|
||||
if let Some(agent) = agents.get(&key)
|
||||
&& (agent.status == AgentStatus::Running || agent.status == AgentStatus::Pending)
|
||||
{
|
||||
@@ -396,11 +397,13 @@ impl AgentPool {
|
||||
agent.status
|
||||
));
|
||||
}
|
||||
// Enforce single-instance concurrency: if this agent is already running on
|
||||
// any other story, reject the request. The story remains in its current pipeline
|
||||
// directory and `auto_assign_available_work` will pick it up when the agent
|
||||
// becomes free.
|
||||
if let Some(busy_story) = agents.iter().find_map(|(k, a)| {
|
||||
// Enforce single-instance concurrency for explicitly-named agents:
|
||||
// if this agent is already running on any other story, reject.
|
||||
// Auto-selected agents are already guaranteed idle by
|
||||
// find_free_agent_for_stage, so this check is only needed for
|
||||
// explicit requests.
|
||||
if agent_name.is_some()
|
||||
&& let Some(busy_story) = agents.iter().find_map(|(k, a)| {
|
||||
if a.agent_name == resolved_name
|
||||
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
|
||||
{
|
||||
@@ -413,7 +416,8 @@ impl AgentPool {
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}) {
|
||||
})
|
||||
{
|
||||
return Err(format!(
|
||||
"Agent '{resolved_name}' is already running on story '{busy_story}'; \
|
||||
story '{story_id}' will be picked up when the agent becomes available"
|
||||
@@ -437,6 +441,23 @@ impl AgentPool {
|
||||
}
|
||||
let mut pending_guard = PendingGuard::new(self.agents.clone(), key.clone());
|
||||
|
||||
// Create persistent log writer (needs resolved_name, so must be after
|
||||
// the atomic resolution above).
|
||||
let log_writer = match AgentLogWriter::new(
|
||||
project_root,
|
||||
story_id,
|
||||
&resolved_name,
|
||||
&log_session_id,
|
||||
) {
|
||||
Ok(w) => Some(Arc::new(Mutex::new(w))),
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}"
|
||||
);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Notify WebSocket clients that a new agent is pending.
|
||||
Self::notify_agent_state_changed(&self.watcher_tx);
|
||||
|
||||
@@ -674,6 +695,28 @@ impl AgentPool {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the names of configured agents for `stage` that are not currently
|
||||
/// running or pending.
|
||||
pub fn available_agents_for_stage(
|
||||
&self,
|
||||
config: &ProjectConfig,
|
||||
stage: &PipelineStage,
|
||||
) -> Result<Vec<String>, String> {
|
||||
let agents = self.agents.lock().map_err(|e| e.to_string())?;
|
||||
Ok(config
|
||||
.agent
|
||||
.iter()
|
||||
.filter(|cfg| agent_config_stage(cfg) == *stage)
|
||||
.filter(|cfg| {
|
||||
!agents.values().any(|a| {
|
||||
a.agent_name == cfg.name
|
||||
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
|
||||
})
|
||||
})
|
||||
.map(|cfg| cfg.name.clone())
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// List all agents with their status.
|
||||
pub fn list_agents(&self) -> Result<Vec<AgentInfo>, String> {
|
||||
let agents = self.agents.lock().map_err(|e| e.to_string())?;
|
||||
@@ -6299,4 +6342,198 @@ stage = "qa"
|
||||
(bug 173: lozenges must update when agents are assigned during pipeline advance)"
|
||||
);
|
||||
}
|
||||
|
||||
// ── available_agents_for_stage tests (story 190) ──────────────────────────
|
||||
|
||||
fn make_config(toml_str: &str) -> ProjectConfig {
|
||||
ProjectConfig::parse(toml_str).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn available_agents_for_stage_returns_idle_agents() {
|
||||
let config = make_config(
|
||||
r#"
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
|
||||
[[agent]]
|
||||
name = "coder-2"
|
||||
stage = "coder"
|
||||
|
||||
[[agent]]
|
||||
name = "qa"
|
||||
stage = "qa"
|
||||
"#,
|
||||
);
|
||||
let pool = AgentPool::new_test(3001);
|
||||
// coder-1 is busy on story-1
|
||||
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
|
||||
|
||||
let available = pool
|
||||
.available_agents_for_stage(&config, &PipelineStage::Coder)
|
||||
.unwrap();
|
||||
assert_eq!(available, vec!["coder-2"]);
|
||||
|
||||
let available_qa = pool
|
||||
.available_agents_for_stage(&config, &PipelineStage::Qa)
|
||||
.unwrap();
|
||||
assert_eq!(available_qa, vec!["qa"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn available_agents_for_stage_returns_empty_when_all_busy() {
|
||||
let config = make_config(
|
||||
r#"
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
"#,
|
||||
);
|
||||
let pool = AgentPool::new_test(3001);
|
||||
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
|
||||
|
||||
let available = pool
|
||||
.available_agents_for_stage(&config, &PipelineStage::Coder)
|
||||
.unwrap();
|
||||
assert!(available.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn available_agents_for_stage_ignores_completed_agents() {
|
||||
let config = make_config(
|
||||
r#"
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
"#,
|
||||
);
|
||||
let pool = AgentPool::new_test(3001);
|
||||
// Completed agents should not count as busy.
|
||||
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Completed);
|
||||
|
||||
let available = pool
|
||||
.available_agents_for_stage(&config, &PipelineStage::Coder)
|
||||
.unwrap();
|
||||
assert_eq!(available, vec!["coder-1"]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn start_agent_auto_selects_second_coder_when_first_busy() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let sk = tmp.path().join(".story_kit");
|
||||
std::fs::create_dir_all(&sk).unwrap();
|
||||
std::fs::write(
|
||||
sk.join("project.toml"),
|
||||
r#"
|
||||
[[agent]]
|
||||
name = "supervisor"
|
||||
stage = "other"
|
||||
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
|
||||
[[agent]]
|
||||
name = "coder-2"
|
||||
stage = "coder"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
// coder-1 is busy on another story
|
||||
pool.inject_test_agent("other-story", "coder-1", AgentStatus::Running);
|
||||
|
||||
// Call start_agent without agent_name — should pick coder-2
|
||||
let result = pool
|
||||
.start_agent(tmp.path(), "42_my_story", None, None)
|
||||
.await;
|
||||
// Will fail for infrastructure reasons (no git repo), but should NOT
|
||||
// fail with "All coder agents are busy" — that would mean it didn't
|
||||
// try coder-2.
|
||||
match result {
|
||||
Ok(info) => {
|
||||
assert_eq!(info.agent_name, "coder-2");
|
||||
}
|
||||
Err(err) => {
|
||||
assert!(
|
||||
!err.contains("All coder agents are busy"),
|
||||
"should have selected coder-2 but got: {err}"
|
||||
);
|
||||
assert!(
|
||||
!err.contains("No coder agent configured"),
|
||||
"should not fail on agent selection, got: {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn start_agent_returns_busy_when_all_coders_occupied() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let sk = tmp.path().join(".story_kit");
|
||||
std::fs::create_dir_all(&sk).unwrap();
|
||||
std::fs::write(
|
||||
sk.join("project.toml"),
|
||||
r#"
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
|
||||
[[agent]]
|
||||
name = "coder-2"
|
||||
stage = "coder"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
|
||||
pool.inject_test_agent("story-2", "coder-2", AgentStatus::Pending);
|
||||
|
||||
let result = pool
|
||||
.start_agent(tmp.path(), "story-3", None, None)
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err();
|
||||
assert!(
|
||||
err.contains("All coder agents are busy"),
|
||||
"expected busy error, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn start_agent_explicit_name_unchanged_when_busy() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let sk = tmp.path().join(".story_kit");
|
||||
std::fs::create_dir_all(&sk).unwrap();
|
||||
std::fs::write(
|
||||
sk.join("project.toml"),
|
||||
r#"
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
|
||||
[[agent]]
|
||||
name = "coder-2"
|
||||
stage = "coder"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
|
||||
|
||||
// Explicit request for coder-1 (busy) should fail even though coder-2 is free.
|
||||
let result = pool
|
||||
.start_agent(tmp.path(), "story-2", Some("coder-1"), None)
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err();
|
||||
assert!(
|
||||
err.contains("coder-1") && err.contains("already running"),
|
||||
"expected explicit busy error, got: {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,20 +57,6 @@ pub struct AgentConfig {
|
||||
pub inactivity_timeout_secs: u64,
|
||||
}
|
||||
|
||||
impl AgentConfig {
|
||||
/// Returns true if this agent is a coder.
|
||||
///
|
||||
/// Prefers the explicit `stage` field; falls back to checking whether the
|
||||
/// agent name starts with `"coder"` for backwards compatibility.
|
||||
pub fn is_coder(&self) -> bool {
|
||||
match self.stage.as_deref() {
|
||||
Some("coder") => true,
|
||||
Some(_) => false,
|
||||
None => self.name.starts_with("coder"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_path() -> String {
|
||||
".".to_string()
|
||||
}
|
||||
@@ -203,15 +189,6 @@ impl ProjectConfig {
|
||||
self.agent.first()
|
||||
}
|
||||
|
||||
/// Get the first coder agent config.
|
||||
///
|
||||
/// Prefers the explicit `stage = "coder"` field over the legacy name-based
|
||||
/// heuristic (names starting with "coder"). Returns `None` if no coder
|
||||
/// agent is configured.
|
||||
pub fn default_coder_agent(&self) -> Option<&AgentConfig> {
|
||||
self.agent.iter().find(|a| a.is_coder())
|
||||
}
|
||||
|
||||
/// Render template variables in agent args and prompt for the given agent.
|
||||
/// If `agent_name` is None, uses the first (default) agent.
|
||||
pub fn render_agent_args(
|
||||
@@ -503,89 +480,6 @@ name = "second"
|
||||
assert!(config.find_agent("missing").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_coder_agent_skips_supervisor() {
|
||||
let toml_str = r#"
|
||||
[[agent]]
|
||||
name = "supervisor"
|
||||
stage = "other"
|
||||
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
"#;
|
||||
let config = ProjectConfig::parse(toml_str).unwrap();
|
||||
assert_eq!(config.default_coder_agent().unwrap().name, "coder-1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_coder_agent_uses_name_heuristic_when_no_stage() {
|
||||
let toml_str = r#"
|
||||
[[agent]]
|
||||
name = "supervisor"
|
||||
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
"#;
|
||||
let config = ProjectConfig::parse(toml_str).unwrap();
|
||||
assert_eq!(config.default_coder_agent().unwrap().name, "coder-1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_coder_agent_returns_none_when_no_coders() {
|
||||
let toml_str = r#"
|
||||
[[agent]]
|
||||
name = "supervisor"
|
||||
stage = "other"
|
||||
"#;
|
||||
let config = ProjectConfig::parse(toml_str).unwrap();
|
||||
assert!(config.default_coder_agent().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_coder_explicit_stage() {
|
||||
let mut agent = AgentConfig {
|
||||
name: "my-agent".to_string(),
|
||||
role: String::new(),
|
||||
command: "claude".to_string(),
|
||||
args: vec![],
|
||||
prompt: String::new(),
|
||||
model: None,
|
||||
allowed_tools: None,
|
||||
max_turns: None,
|
||||
max_budget_usd: None,
|
||||
system_prompt: None,
|
||||
stage: Some("coder".to_string()),
|
||||
inactivity_timeout_secs: 300,
|
||||
};
|
||||
assert!(agent.is_coder());
|
||||
agent.stage = Some("other".to_string());
|
||||
assert!(!agent.is_coder());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_coder_name_heuristic() {
|
||||
let make = |name: &str| AgentConfig {
|
||||
name: name.to_string(),
|
||||
role: String::new(),
|
||||
command: "claude".to_string(),
|
||||
args: vec![],
|
||||
prompt: String::new(),
|
||||
model: None,
|
||||
allowed_tools: None,
|
||||
max_turns: None,
|
||||
max_budget_usd: None,
|
||||
system_prompt: None,
|
||||
stage: None,
|
||||
inactivity_timeout_secs: 300,
|
||||
};
|
||||
assert!(make("coder-1").is_coder());
|
||||
assert!(make("coder-opus").is_coder());
|
||||
assert!(!make("supervisor").is_coder());
|
||||
assert!(!make("qa").is_coder());
|
||||
assert!(!make("mergemaster").is_coder());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_project_toml_from_file() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::agents::{close_bug_to_archive, move_story_to_archived, move_story_to_merge, move_story_to_qa};
|
||||
use crate::agents::{close_bug_to_archive, move_story_to_archived, move_story_to_merge, move_story_to_qa, PipelineStage};
|
||||
use crate::config::ProjectConfig;
|
||||
use crate::log_buffer;
|
||||
use crate::slog_warn;
|
||||
@@ -1269,6 +1269,22 @@ fn get_agent_output_from_log(
|
||||
fn tool_get_agent_config(ctx: &AppContext) -> Result<String, String> {
|
||||
let project_root = ctx.agents.get_project_root(&ctx.state)?;
|
||||
let config = ProjectConfig::load(&project_root)?;
|
||||
|
||||
// Collect available (idle) agent names across all stages so the caller can
|
||||
// see at a glance which agents are free to start (story 190).
|
||||
let mut available_names: std::collections::HashSet<String> =
|
||||
std::collections::HashSet::new();
|
||||
for stage in &[
|
||||
PipelineStage::Coder,
|
||||
PipelineStage::Qa,
|
||||
PipelineStage::Mergemaster,
|
||||
PipelineStage::Other,
|
||||
] {
|
||||
if let Ok(names) = ctx.agents.available_agents_for_stage(&config, stage) {
|
||||
available_names.extend(names);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_string_pretty(&json!(config
|
||||
.agent
|
||||
.iter()
|
||||
@@ -1279,6 +1295,7 @@ fn tool_get_agent_config(ctx: &AppContext) -> Result<String, String> {
|
||||
"allowed_tools": a.allowed_tools,
|
||||
"max_turns": a.max_turns,
|
||||
"max_budget_usd": a.max_budget_usd,
|
||||
"available": available_names.contains(&a.name),
|
||||
}))
|
||||
.collect::<Vec<_>>()))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
|
||||
Reference in New Issue
Block a user