story-kit: merge 190_story_auto_select_available_agent_for_stage_in_start_agent

This commit is contained in:
Dave
2026-02-25 16:17:38 +00:00
parent 8c338a6c63
commit 411653ab15
3 changed files with 316 additions and 168 deletions

View File

@@ -323,7 +323,11 @@ impl AgentPool {
}
/// Start an agent for a story: load config, create worktree, spawn agent.
/// If `agent_name` is None, defaults to the first configured agent.
///
/// When `agent_name` is `None`, automatically selects the first idle coder
/// agent (story 190). If all coders are busy the call fails with an error
/// indicating the story will be picked up when one becomes available.
///
/// If `resume_context` is provided, it is appended to the rendered prompt
/// so the agent can pick up from a previous failed attempt.
pub async fn start_agent(
@@ -335,59 +339,56 @@ impl AgentPool {
) -> Result<AgentInfo, String> {
let config = ProjectConfig::load(project_root)?;
// Resolve agent name from config
let resolved_name = match agent_name {
Some(name) => {
config
.find_agent(name)
.ok_or_else(|| format!("No agent named '{name}' in config"))?;
name.to_string()
}
None => config
.default_coder_agent()
.ok_or_else(|| {
"No coder agent configured. Specify an agent_name explicitly.".to_string()
})?
.name
.clone(),
};
// Validate explicit agent name early (no lock needed).
if let Some(name) = agent_name {
config
.find_agent(name)
.ok_or_else(|| format!("No agent named '{name}' in config"))?;
}
let key = composite_key(story_id, &resolved_name);
// Create shared resources before the atomic check-and-insert so the
// agents lock is held continuously from the availability check through
// the Pending insert, eliminating the TOCTOU race (story 132).
// Create name-independent shared resources before the lock so they are
// ready for the atomic check-and-insert (story 132).
let (tx, _) = broadcast::channel::<AgentEvent>(1024);
let event_log: Arc<Mutex<Vec<AgentEvent>>> = Arc::new(Mutex::new(Vec::new()));
// Generate a unique session ID for the persistent log file.
let log_session_id = uuid::Uuid::new_v4().to_string();
// Create persistent log writer.
let log_writer = match AgentLogWriter::new(
project_root,
story_id,
&resolved_name,
&log_session_id,
) {
Ok(w) => Some(Arc::new(Mutex::new(w))),
Err(e) => {
eprintln!("[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}");
None
}
};
// Atomically check availability and register as Pending. The lock is
// held continuously from the duplicate check through the HashMap insert
// so no concurrent start_agent call can slip through the check before
// this insert completes (fixes TOCTOU race, story 132).
// Atomically resolve agent name, check availability, and register as
// Pending. When `agent_name` is `None` the first idle coder is
// selected inside the lock so no TOCTOU race can occur between the
// availability check and the Pending insert (story 132, story 190).
//
// The `PendingGuard` ensures that if any step below fails the entry is
// removed from the pool so it does not permanently block auto-assign
// (bug 118).
let resolved_name: String;
let key: String;
{
let mut agents = self.agents.lock().map_err(|e| e.to_string())?;
resolved_name = match agent_name {
Some(name) => name.to_string(),
None => find_free_agent_for_stage(&config, &agents, &PipelineStage::Coder)
.map(|s| s.to_string())
.ok_or_else(|| {
if config
.agent
.iter()
.any(|a| agent_config_stage(a) == PipelineStage::Coder)
{
format!(
"All coder agents are busy; story '{story_id}' will be \
picked up when one becomes available"
)
} else {
"No coder agent configured. Specify an agent_name explicitly."
.to_string()
}
})?,
};
key = composite_key(story_id, &resolved_name);
// Check for duplicate assignment (same story + same agent already active).
if let Some(agent) = agents.get(&key)
&& (agent.status == AgentStatus::Running || agent.status == AgentStatus::Pending)
{
@@ -396,24 +397,27 @@ impl AgentPool {
agent.status
));
}
// Enforce single-instance concurrency: if this agent is already running on
// any other story, reject the request. The story remains in its current pipeline
// directory and `auto_assign_available_work` will pick it up when the agent
// becomes free.
if let Some(busy_story) = agents.iter().find_map(|(k, a)| {
if a.agent_name == resolved_name
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
{
Some(
k.rsplit_once(':')
.map(|(sid, _)| sid)
.unwrap_or(k)
.to_string(),
)
} else {
None
}
}) {
// Enforce single-instance concurrency for explicitly-named agents:
// if this agent is already running on any other story, reject.
// Auto-selected agents are already guaranteed idle by
// find_free_agent_for_stage, so this check is only needed for
// explicit requests.
if agent_name.is_some()
&& let Some(busy_story) = agents.iter().find_map(|(k, a)| {
if a.agent_name == resolved_name
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
{
Some(
k.rsplit_once(':')
.map(|(sid, _)| sid)
.unwrap_or(k)
.to_string(),
)
} else {
None
}
})
{
return Err(format!(
"Agent '{resolved_name}' is already running on story '{busy_story}'; \
story '{story_id}' will be picked up when the agent becomes available"
@@ -437,6 +441,23 @@ impl AgentPool {
}
let mut pending_guard = PendingGuard::new(self.agents.clone(), key.clone());
// Create persistent log writer (needs resolved_name, so must be after
// the atomic resolution above).
let log_writer = match AgentLogWriter::new(
project_root,
story_id,
&resolved_name,
&log_session_id,
) {
Ok(w) => Some(Arc::new(Mutex::new(w))),
Err(e) => {
eprintln!(
"[agents] Failed to create log writer for {story_id}:{resolved_name}: {e}"
);
None
}
};
// Notify WebSocket clients that a new agent is pending.
Self::notify_agent_state_changed(&self.watcher_tx);
@@ -674,6 +695,28 @@ impl AgentPool {
Ok(())
}
/// Return the names of configured agents for `stage` that are not currently
/// running or pending.
pub fn available_agents_for_stage(
&self,
config: &ProjectConfig,
stage: &PipelineStage,
) -> Result<Vec<String>, String> {
let agents = self.agents.lock().map_err(|e| e.to_string())?;
Ok(config
.agent
.iter()
.filter(|cfg| agent_config_stage(cfg) == *stage)
.filter(|cfg| {
!agents.values().any(|a| {
a.agent_name == cfg.name
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
})
})
.map(|cfg| cfg.name.clone())
.collect())
}
/// List all agents with their status.
pub fn list_agents(&self) -> Result<Vec<AgentInfo>, String> {
let agents = self.agents.lock().map_err(|e| e.to_string())?;
@@ -6299,4 +6342,198 @@ stage = "qa"
(bug 173: lozenges must update when agents are assigned during pipeline advance)"
);
}
// ── available_agents_for_stage tests (story 190) ──────────────────────────
fn make_config(toml_str: &str) -> ProjectConfig {
ProjectConfig::parse(toml_str).unwrap()
}
#[test]
fn available_agents_for_stage_returns_idle_agents() {
let config = make_config(
r#"
[[agent]]
name = "coder-1"
stage = "coder"
[[agent]]
name = "coder-2"
stage = "coder"
[[agent]]
name = "qa"
stage = "qa"
"#,
);
let pool = AgentPool::new_test(3001);
// coder-1 is busy on story-1
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
let available = pool
.available_agents_for_stage(&config, &PipelineStage::Coder)
.unwrap();
assert_eq!(available, vec!["coder-2"]);
let available_qa = pool
.available_agents_for_stage(&config, &PipelineStage::Qa)
.unwrap();
assert_eq!(available_qa, vec!["qa"]);
}
#[test]
fn available_agents_for_stage_returns_empty_when_all_busy() {
let config = make_config(
r#"
[[agent]]
name = "coder-1"
stage = "coder"
"#,
);
let pool = AgentPool::new_test(3001);
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
let available = pool
.available_agents_for_stage(&config, &PipelineStage::Coder)
.unwrap();
assert!(available.is_empty());
}
#[test]
fn available_agents_for_stage_ignores_completed_agents() {
let config = make_config(
r#"
[[agent]]
name = "coder-1"
stage = "coder"
"#,
);
let pool = AgentPool::new_test(3001);
// Completed agents should not count as busy.
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Completed);
let available = pool
.available_agents_for_stage(&config, &PipelineStage::Coder)
.unwrap();
assert_eq!(available, vec!["coder-1"]);
}
#[tokio::test]
async fn start_agent_auto_selects_second_coder_when_first_busy() {
let tmp = tempfile::tempdir().unwrap();
let sk = tmp.path().join(".story_kit");
std::fs::create_dir_all(&sk).unwrap();
std::fs::write(
sk.join("project.toml"),
r#"
[[agent]]
name = "supervisor"
stage = "other"
[[agent]]
name = "coder-1"
stage = "coder"
[[agent]]
name = "coder-2"
stage = "coder"
"#,
)
.unwrap();
let pool = AgentPool::new_test(3001);
// coder-1 is busy on another story
pool.inject_test_agent("other-story", "coder-1", AgentStatus::Running);
// Call start_agent without agent_name — should pick coder-2
let result = pool
.start_agent(tmp.path(), "42_my_story", None, None)
.await;
// Will fail for infrastructure reasons (no git repo), but should NOT
// fail with "All coder agents are busy" — that would mean it didn't
// try coder-2.
match result {
Ok(info) => {
assert_eq!(info.agent_name, "coder-2");
}
Err(err) => {
assert!(
!err.contains("All coder agents are busy"),
"should have selected coder-2 but got: {err}"
);
assert!(
!err.contains("No coder agent configured"),
"should not fail on agent selection, got: {err}"
);
}
}
}
#[tokio::test]
async fn start_agent_returns_busy_when_all_coders_occupied() {
let tmp = tempfile::tempdir().unwrap();
let sk = tmp.path().join(".story_kit");
std::fs::create_dir_all(&sk).unwrap();
std::fs::write(
sk.join("project.toml"),
r#"
[[agent]]
name = "coder-1"
stage = "coder"
[[agent]]
name = "coder-2"
stage = "coder"
"#,
)
.unwrap();
let pool = AgentPool::new_test(3001);
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
pool.inject_test_agent("story-2", "coder-2", AgentStatus::Pending);
let result = pool
.start_agent(tmp.path(), "story-3", None, None)
.await;
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.contains("All coder agents are busy"),
"expected busy error, got: {err}"
);
}
#[tokio::test]
async fn start_agent_explicit_name_unchanged_when_busy() {
let tmp = tempfile::tempdir().unwrap();
let sk = tmp.path().join(".story_kit");
std::fs::create_dir_all(&sk).unwrap();
std::fs::write(
sk.join("project.toml"),
r#"
[[agent]]
name = "coder-1"
stage = "coder"
[[agent]]
name = "coder-2"
stage = "coder"
"#,
)
.unwrap();
let pool = AgentPool::new_test(3001);
pool.inject_test_agent("story-1", "coder-1", AgentStatus::Running);
// Explicit request for coder-1 (busy) should fail even though coder-2 is free.
let result = pool
.start_agent(tmp.path(), "story-2", Some("coder-1"), None)
.await;
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.contains("coder-1") && err.contains("already running"),
"expected explicit busy error, got: {err}"
);
}
}

View File

@@ -57,20 +57,6 @@ pub struct AgentConfig {
pub inactivity_timeout_secs: u64,
}
impl AgentConfig {
/// Returns true if this agent is a coder.
///
/// Prefers the explicit `stage` field; falls back to checking whether the
/// agent name starts with `"coder"` for backwards compatibility.
pub fn is_coder(&self) -> bool {
match self.stage.as_deref() {
Some("coder") => true,
Some(_) => false,
None => self.name.starts_with("coder"),
}
}
}
fn default_path() -> String {
".".to_string()
}
@@ -203,15 +189,6 @@ impl ProjectConfig {
self.agent.first()
}
/// Get the first coder agent config.
///
/// Prefers the explicit `stage = "coder"` field over the legacy name-based
/// heuristic (names starting with "coder"). Returns `None` if no coder
/// agent is configured.
pub fn default_coder_agent(&self) -> Option<&AgentConfig> {
self.agent.iter().find(|a| a.is_coder())
}
/// Render template variables in agent args and prompt for the given agent.
/// If `agent_name` is None, uses the first (default) agent.
pub fn render_agent_args(
@@ -503,89 +480,6 @@ name = "second"
assert!(config.find_agent("missing").is_none());
}
#[test]
fn default_coder_agent_skips_supervisor() {
let toml_str = r#"
[[agent]]
name = "supervisor"
stage = "other"
[[agent]]
name = "coder-1"
stage = "coder"
"#;
let config = ProjectConfig::parse(toml_str).unwrap();
assert_eq!(config.default_coder_agent().unwrap().name, "coder-1");
}
#[test]
fn default_coder_agent_uses_name_heuristic_when_no_stage() {
let toml_str = r#"
[[agent]]
name = "supervisor"
[[agent]]
name = "coder-1"
"#;
let config = ProjectConfig::parse(toml_str).unwrap();
assert_eq!(config.default_coder_agent().unwrap().name, "coder-1");
}
#[test]
fn default_coder_agent_returns_none_when_no_coders() {
let toml_str = r#"
[[agent]]
name = "supervisor"
stage = "other"
"#;
let config = ProjectConfig::parse(toml_str).unwrap();
assert!(config.default_coder_agent().is_none());
}
#[test]
fn is_coder_explicit_stage() {
let mut agent = AgentConfig {
name: "my-agent".to_string(),
role: String::new(),
command: "claude".to_string(),
args: vec![],
prompt: String::new(),
model: None,
allowed_tools: None,
max_turns: None,
max_budget_usd: None,
system_prompt: None,
stage: Some("coder".to_string()),
inactivity_timeout_secs: 300,
};
assert!(agent.is_coder());
agent.stage = Some("other".to_string());
assert!(!agent.is_coder());
}
#[test]
fn is_coder_name_heuristic() {
let make = |name: &str| AgentConfig {
name: name.to_string(),
role: String::new(),
command: "claude".to_string(),
args: vec![],
prompt: String::new(),
model: None,
allowed_tools: None,
max_turns: None,
max_budget_usd: None,
system_prompt: None,
stage: None,
inactivity_timeout_secs: 300,
};
assert!(make("coder-1").is_coder());
assert!(make("coder-opus").is_coder());
assert!(!make("supervisor").is_coder());
assert!(!make("qa").is_coder());
assert!(!make("mergemaster").is_coder());
}
#[test]
fn parse_project_toml_from_file() {
let tmp = tempfile::tempdir().unwrap();

View File

@@ -1,4 +1,4 @@
use crate::agents::{close_bug_to_archive, move_story_to_archived, move_story_to_merge, move_story_to_qa};
use crate::agents::{close_bug_to_archive, move_story_to_archived, move_story_to_merge, move_story_to_qa, PipelineStage};
use crate::config::ProjectConfig;
use crate::log_buffer;
use crate::slog_warn;
@@ -1269,6 +1269,22 @@ fn get_agent_output_from_log(
fn tool_get_agent_config(ctx: &AppContext) -> Result<String, String> {
let project_root = ctx.agents.get_project_root(&ctx.state)?;
let config = ProjectConfig::load(&project_root)?;
// Collect available (idle) agent names across all stages so the caller can
// see at a glance which agents are free to start (story 190).
let mut available_names: std::collections::HashSet<String> =
std::collections::HashSet::new();
for stage in &[
PipelineStage::Coder,
PipelineStage::Qa,
PipelineStage::Mergemaster,
PipelineStage::Other,
] {
if let Ok(names) = ctx.agents.available_agents_for_stage(&config, stage) {
available_names.extend(names);
}
}
serde_json::to_string_pretty(&json!(config
.agent
.iter()
@@ -1279,6 +1295,7 @@ fn tool_get_agent_config(ctx: &AppContext) -> Result<String, String> {
"allowed_tools": a.allowed_tools,
"max_turns": a.max_turns,
"max_budget_usd": a.max_budget_usd,
"available": available_names.contains(&a.name),
}))
.collect::<Vec<_>>()))
.map_err(|e| format!("Serialization error: {e}"))