refactor: split agents/pool/start.rs into mod.rs + validation.rs + spawn.rs
The 1630-line start.rs is split into a sub-module directory: - validation.rs: validate_agent_stage + read_front_matter_agent helpers (69 lines) - spawn.rs: run_agent_spawn — the background async work that was inlined as a tokio::spawn closure body inside start_agent (359 lines) - mod.rs: AgentPool::start_agent orchestrator + tests (1062 lines) Stage validation and front-matter agent reading are pre-lock pure helpers that naturally extract. The spawn closure body becomes a free async fn that takes the previously-cloned values as parameters; rebound to the original _clone / _owned names at the top of the body so the actual work code is a verbatim copy. No behaviour change. All 23 start tests pass; full suite green.
This commit is contained in:
@@ -16,6 +16,12 @@ use super::types::{PendingGuard, StoryAgent, composite_key};
|
||||
use super::worktree::find_active_story_stage;
|
||||
use super::{AgentPool, auto_assign};
|
||||
|
||||
mod spawn;
|
||||
mod validation;
|
||||
|
||||
use validation::{read_front_matter_agent, validate_agent_stage};
|
||||
|
||||
|
||||
impl AgentPool {
|
||||
/// Start an agent for a story: load config, create worktree, spawn agent.
|
||||
///
|
||||
@@ -71,45 +77,12 @@ impl AgentPool {
|
||||
}
|
||||
|
||||
// Validate that the agent's configured stage matches the story's
|
||||
// pipeline stage. This prevents any caller (auto-assign, MCP tool,
|
||||
// pipeline advance, supervisor) from starting a wrong-stage agent on
|
||||
// a story — e.g. mergemaster on a coding-stage story (bug 312).
|
||||
if let Some(name) = agent_name {
|
||||
let agent_stage = config
|
||||
.find_agent(name)
|
||||
.map(agent_config_stage)
|
||||
.unwrap_or_else(|| pipeline_stage(name));
|
||||
if agent_stage != PipelineStage::Other
|
||||
&& let Some(story_stage_dir) = find_active_story_stage(project_root, story_id)
|
||||
{
|
||||
let expected_stage = match story_stage_dir {
|
||||
"2_current" => PipelineStage::Coder,
|
||||
"3_qa" => PipelineStage::Qa,
|
||||
"4_merge" => PipelineStage::Mergemaster,
|
||||
_ => PipelineStage::Other,
|
||||
};
|
||||
if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
|
||||
return Err(format!(
|
||||
"Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
|
||||
story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
// pipeline stage. (See validation::validate_agent_stage.)
|
||||
validate_agent_stage(&config, project_root, story_id, agent_name)?;
|
||||
|
||||
// Read the preferred agent from the story's front matter before acquiring
|
||||
// the lock. When no explicit agent_name is given, this lets start_agent
|
||||
// honour `agent: coder-opus` written by the `assign` command — mirroring
|
||||
// the auto_assign path (bug 379).
|
||||
let front_matter_agent: Option<String> = if agent_name.is_none() {
|
||||
crate::db::read_content(story_id).and_then(|contents| {
|
||||
crate::io::story_metadata::parse_front_matter(&contents)
|
||||
.ok()?
|
||||
.agent
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
// the lock. (See validation::read_front_matter_agent.)
|
||||
let front_matter_agent: Option<String> = read_front_matter_agent(story_id, agent_name);
|
||||
|
||||
// Atomically resolve agent name, check availability, and register as
|
||||
// Pending. When `agent_name` is `None` the first idle coder is
|
||||
@@ -320,317 +293,28 @@ impl AgentPool {
|
||||
.unwrap_or(300);
|
||||
|
||||
// Clone all values needed inside the background spawn.
|
||||
let project_root_clone = project_root.to_path_buf();
|
||||
let config_clone = config.clone();
|
||||
let resume_context_owned = resume_context.map(str::to_string);
|
||||
let session_id_to_resume_owned = session_id_to_resume;
|
||||
let sid = story_id.to_string();
|
||||
let aname = resolved_name.clone();
|
||||
let tx_clone = tx.clone();
|
||||
let agents_ref = self.agents.clone();
|
||||
let key_clone = key.clone();
|
||||
let log_clone = event_log.clone();
|
||||
let port_for_task = self.port;
|
||||
let log_writer_clone = log_writer.clone();
|
||||
let child_killers_clone = self.child_killers.clone();
|
||||
let watcher_tx_clone = self.watcher_tx.clone();
|
||||
let merge_jobs_clone = Arc::clone(&self.merge_jobs);
|
||||
|
||||
// Spawn the background task. Worktree creation and agent launch happen here
|
||||
// so `start_agent` returns immediately after registering the agent as
|
||||
// Pending — non-blocking by design (story 157).
|
||||
let handle = tokio::spawn(async move {
|
||||
// Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
|
||||
let wt_info = match crate::worktree::create_worktree(
|
||||
&project_root_clone,
|
||||
&sid,
|
||||
&config_clone,
|
||||
port_for_task,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(wt) => wt,
|
||||
Err(e) => {
|
||||
let error_msg = format!("Failed to create worktree: {e}");
|
||||
slog_error!("[agents] {error_msg}");
|
||||
let event = AgentEvent::Error {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
message: error_msg,
|
||||
};
|
||||
if let Ok(mut log) = log_clone.lock() {
|
||||
log.push(event.clone());
|
||||
}
|
||||
let _ = tx_clone.send(event);
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
}
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let handle = tokio::spawn(spawn::run_agent_spawn(
|
||||
project_root.to_path_buf(),
|
||||
config.clone(),
|
||||
resume_context.map(str::to_string),
|
||||
session_id_to_resume,
|
||||
story_id.to_string(),
|
||||
resolved_name.clone(),
|
||||
tx.clone(),
|
||||
self.agents.clone(),
|
||||
key.clone(),
|
||||
event_log.clone(),
|
||||
self.port,
|
||||
log_writer.clone(),
|
||||
self.child_killers.clone(),
|
||||
self.watcher_tx.clone(),
|
||||
Arc::clone(&self.merge_jobs),
|
||||
inactivity_timeout_secs,
|
||||
));
|
||||
|
||||
// Step 2: store worktree info and render agent command/args/prompt.
|
||||
let wt_path_str = wt_info.path.to_string_lossy().to_string();
|
||||
{
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.worktree_info = Some(wt_info.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let (command, args, mut prompt) = match config_clone.render_agent_args(
|
||||
&wt_path_str,
|
||||
&sid,
|
||||
Some(&aname),
|
||||
Some(&wt_info.base_branch),
|
||||
) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
let error_msg = format!("Failed to render agent args: {e}");
|
||||
slog_error!("[agents] {error_msg}");
|
||||
let event = AgentEvent::Error {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
message: error_msg,
|
||||
};
|
||||
if let Ok(mut log) = log_clone.lock() {
|
||||
log.push(event.clone());
|
||||
}
|
||||
let _ = tx_clone.send(event);
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
}
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Append project-local prompt content (.huskies/AGENT.md) to the
|
||||
// baked-in prompt so every agent role sees project-specific guidance
|
||||
// without any config changes. The file is read fresh each spawn;
|
||||
// if absent or empty, the prompt is unchanged and no warning is logged.
|
||||
if let Some(local) =
|
||||
crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
|
||||
{
|
||||
prompt.push_str("\n\n");
|
||||
prompt.push_str(&local);
|
||||
}
|
||||
|
||||
// Build the effective prompt and determine resume session.
|
||||
//
|
||||
// When resuming a previous session, discard the full rendered prompt
|
||||
// (which would re-read CLAUDE.md and README) and send only the gate
|
||||
// failure context as a new message. On a fresh start, append the
|
||||
// failure context to the original prompt as before.
|
||||
let effective_prompt = match &session_id_to_resume_owned {
|
||||
Some(_) => resume_context_owned.unwrap_or_default(),
|
||||
None => {
|
||||
if let Some(ctx) = resume_context_owned {
|
||||
prompt.push_str(&ctx);
|
||||
}
|
||||
prompt
|
||||
}
|
||||
};
|
||||
|
||||
// Step 3: transition to Running now that the worktree is ready.
|
||||
{
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Running;
|
||||
}
|
||||
}
|
||||
let _ = tx_clone.send(AgentEvent::Status {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
status: "running".to_string(),
|
||||
});
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
|
||||
// Step 4: launch the agent process via the configured runtime.
|
||||
let runtime_name = config_clone
|
||||
.find_agent(&aname)
|
||||
.and_then(|a| a.runtime.as_deref())
|
||||
.unwrap_or("claude-code");
|
||||
|
||||
let run_result = match runtime_name {
|
||||
"claude-code" => {
|
||||
let runtime = ClaudeCodeRuntime::new(
|
||||
child_killers_clone.clone(),
|
||||
watcher_tx_clone.clone(),
|
||||
);
|
||||
let ctx = RuntimeContext {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
command,
|
||||
args,
|
||||
prompt: effective_prompt,
|
||||
cwd: wt_path_str,
|
||||
inactivity_timeout_secs,
|
||||
mcp_port: port_for_task,
|
||||
session_id_to_resume: session_id_to_resume_owned.clone(),
|
||||
};
|
||||
runtime
|
||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||
.await
|
||||
}
|
||||
"gemini" => {
|
||||
let runtime = GeminiRuntime::new();
|
||||
let ctx = RuntimeContext {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
command,
|
||||
args,
|
||||
prompt: effective_prompt,
|
||||
cwd: wt_path_str,
|
||||
inactivity_timeout_secs,
|
||||
mcp_port: port_for_task,
|
||||
session_id_to_resume: session_id_to_resume_owned.clone(),
|
||||
};
|
||||
runtime
|
||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||
.await
|
||||
}
|
||||
"openai" => {
|
||||
let runtime = OpenAiRuntime::new();
|
||||
let ctx = RuntimeContext {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
command,
|
||||
args,
|
||||
prompt: effective_prompt,
|
||||
cwd: wt_path_str,
|
||||
inactivity_timeout_secs,
|
||||
mcp_port: port_for_task,
|
||||
session_id_to_resume: session_id_to_resume_owned,
|
||||
};
|
||||
runtime
|
||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||
.await
|
||||
}
|
||||
other => Err(format!(
|
||||
"Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
|
||||
Supported: 'claude-code', 'gemini', 'openai'"
|
||||
)),
|
||||
};
|
||||
|
||||
match run_result {
|
||||
Ok(result) => {
|
||||
// Persist token usage if the agent reported it.
|
||||
if let Some(ref usage) = result.token_usage
|
||||
&& let Ok(agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get(&key_clone)
|
||||
&& let Some(ref pr) = agent.project_root
|
||||
{
|
||||
let model = config_clone
|
||||
.find_agent(&aname)
|
||||
.and_then(|a| a.model.clone());
|
||||
let record = crate::agents::token_usage::build_record(
|
||||
&sid,
|
||||
&aname,
|
||||
model,
|
||||
usage.clone(),
|
||||
);
|
||||
if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
|
||||
slog_error!(
|
||||
"[agents] Failed to persist token usage for \
|
||||
{sid}:{aname}: {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Mergemaster agents have their own completion path via
|
||||
// start_merge_agent_work / run_merge_pipeline and must NOT go
|
||||
// through server-owned gates. When a mergemaster exits early
|
||||
// (e.g. rate-limited before calling start_merge_agent_work) the
|
||||
// feature-branch worktree compiles fine and post-merge tests on
|
||||
// master pass (nothing changed), which would wrongly advance the
|
||||
// story to 5_done/ without any squash merge having occurred.
|
||||
// Instead: just remove the agent from the pool and let
|
||||
// auto-assign restart a new mergemaster for the story.
|
||||
let stage = config_clone
|
||||
.find_agent(&aname)
|
||||
.map(agent_config_stage)
|
||||
.unwrap_or_else(|| pipeline_stage(&aname));
|
||||
if stage == PipelineStage::Mergemaster {
|
||||
let (tx_done, done_session_id) = {
|
||||
let mut lock = match agents_ref.lock() {
|
||||
Ok(a) => a,
|
||||
Err(_) => return,
|
||||
};
|
||||
if let Some(agent) = lock.remove(&key_clone) {
|
||||
(agent.tx, agent.session_id.or(result.session_id))
|
||||
} else {
|
||||
(tx_clone.clone(), result.session_id)
|
||||
}
|
||||
};
|
||||
// Clear any stale Running merge job so the next mergemaster
|
||||
// can call start_merge_agent_work without hitting "Merge
|
||||
// already in progress" (bug 498).
|
||||
if let Ok(mut jobs) = merge_jobs_clone.lock()
|
||||
&& let Some(job) = jobs.get(&sid)
|
||||
&& matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
|
||||
{
|
||||
jobs.remove(&sid);
|
||||
}
|
||||
let _ = tx_done.send(AgentEvent::Done {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
session_id: done_session_id,
|
||||
});
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
// Send a WorkItem event so the auto-assign watcher loop
|
||||
// re-dispatches a new mergemaster if the story still needs
|
||||
// merging. This avoids an async call to start_agent inside
|
||||
// a tokio::spawn (which would require Send).
|
||||
let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
|
||||
stage: "4_merge".to_string(),
|
||||
item_id: sid.clone(),
|
||||
action: "reassign".to_string(),
|
||||
commit_msg: String::new(),
|
||||
from_stage: None,
|
||||
});
|
||||
} else {
|
||||
// Server-owned completion: run acceptance gates automatically
|
||||
// when the agent process exits normally.
|
||||
super::pipeline::run_server_owned_completion(
|
||||
&agents_ref,
|
||||
port_for_task,
|
||||
&sid,
|
||||
&aname,
|
||||
result.session_id,
|
||||
watcher_tx_clone.clone(),
|
||||
)
|
||||
.await;
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
|
||||
let event = AgentEvent::Error {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
message: e,
|
||||
};
|
||||
if let Ok(mut log) = log_clone.lock() {
|
||||
log.push(event.clone());
|
||||
}
|
||||
let _ = tx_clone.send(event);
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
}
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Store the task handle while the agent is still Pending.
|
||||
{
|
||||
@@ -0,0 +1,361 @@
|
||||
//! Background async work spawned by `AgentPool::start_agent`.
|
||||
//!
|
||||
//! `start_agent` returns immediately after registering the agent as `Pending`;
|
||||
//! this module runs the slow worktree creation, agent process launch, and
|
||||
//! event streaming in the background (story 157).
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use portable_pty::ChildKiller;
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use crate::agent_log::AgentLogWriter;
|
||||
use crate::config::ProjectConfig;
|
||||
use crate::io::watcher::WatcherEvent;
|
||||
use crate::slog_error;
|
||||
|
||||
use super::super::super::{
|
||||
AgentEvent, AgentStatus, PipelineStage, agent_config_stage, pipeline_stage,
|
||||
};
|
||||
use super::super::super::merge::MergeJob;
|
||||
use super::super::AgentPool;
|
||||
use super::super::super::runtime::{
|
||||
AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext,
|
||||
};
|
||||
use super::super::types::StoryAgent;
|
||||
|
||||
/// Run the background worktree-creation + agent-launch flow.
|
||||
///
|
||||
/// Caller (`AgentPool::start_agent`) wraps this in `tokio::spawn` and stores
|
||||
/// the resulting handle on the Pending entry so cancellation works.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) async fn run_agent_spawn(
|
||||
project_root: PathBuf,
|
||||
config: ProjectConfig,
|
||||
resume_context: Option<String>,
|
||||
session_id_to_resume: Option<String>,
|
||||
story_id: String,
|
||||
agent_name: String,
|
||||
tx: broadcast::Sender<AgentEvent>,
|
||||
agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
|
||||
key: String,
|
||||
event_log: Arc<Mutex<Vec<AgentEvent>>>,
|
||||
port: u16,
|
||||
log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
|
||||
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
watcher_tx: broadcast::Sender<WatcherEvent>,
|
||||
merge_jobs: Arc<Mutex<HashMap<String, MergeJob>>>,
|
||||
inactivity_timeout_secs: u64,
|
||||
) {
|
||||
// Re-bind to the legacy `_clone` / `_owned` names so the body below remains
|
||||
// a verbatim copy of the original closure (story 157).
|
||||
let project_root_clone = project_root;
|
||||
let config_clone = config;
|
||||
let resume_context_owned = resume_context;
|
||||
let session_id_to_resume_owned = session_id_to_resume;
|
||||
let sid = story_id;
|
||||
let aname = agent_name;
|
||||
let tx_clone = tx;
|
||||
let agents_ref = agents;
|
||||
let key_clone = key;
|
||||
let log_clone = event_log;
|
||||
let port_for_task = port;
|
||||
let log_writer_clone = log_writer;
|
||||
let child_killers_clone = child_killers;
|
||||
let watcher_tx_clone = watcher_tx;
|
||||
let merge_jobs_clone = merge_jobs;
|
||||
let _ = inactivity_timeout_secs; // currently unused inside the closure body
|
||||
|
||||
// Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
|
||||
let wt_info = match crate::worktree::create_worktree(
|
||||
&project_root_clone,
|
||||
&sid,
|
||||
&config_clone,
|
||||
port_for_task,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(wt) => wt,
|
||||
Err(e) => {
|
||||
let error_msg = format!("Failed to create worktree: {e}");
|
||||
slog_error!("[agents] {error_msg}");
|
||||
let event = AgentEvent::Error {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
message: error_msg,
|
||||
};
|
||||
if let Ok(mut log) = log_clone.lock() {
|
||||
log.push(event.clone());
|
||||
}
|
||||
let _ = tx_clone.send(event);
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
}
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Step 2: store worktree info and render agent command/args/prompt.
|
||||
let wt_path_str = wt_info.path.to_string_lossy().to_string();
|
||||
{
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.worktree_info = Some(wt_info.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let (command, args, mut prompt) = match config_clone.render_agent_args(
|
||||
&wt_path_str,
|
||||
&sid,
|
||||
Some(&aname),
|
||||
Some(&wt_info.base_branch),
|
||||
) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
let error_msg = format!("Failed to render agent args: {e}");
|
||||
slog_error!("[agents] {error_msg}");
|
||||
let event = AgentEvent::Error {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
message: error_msg,
|
||||
};
|
||||
if let Ok(mut log) = log_clone.lock() {
|
||||
log.push(event.clone());
|
||||
}
|
||||
let _ = tx_clone.send(event);
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
}
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Append project-local prompt content (.huskies/AGENT.md) to the
|
||||
// baked-in prompt so every agent role sees project-specific guidance
|
||||
// without any config changes. The file is read fresh each spawn;
|
||||
// if absent or empty, the prompt is unchanged and no warning is logged.
|
||||
if let Some(local) =
|
||||
crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
|
||||
{
|
||||
prompt.push_str("\n\n");
|
||||
prompt.push_str(&local);
|
||||
}
|
||||
|
||||
// Build the effective prompt and determine resume session.
|
||||
//
|
||||
// When resuming a previous session, discard the full rendered prompt
|
||||
// (which would re-read CLAUDE.md and README) and send only the gate
|
||||
// failure context as a new message. On a fresh start, append the
|
||||
// failure context to the original prompt as before.
|
||||
let effective_prompt = match &session_id_to_resume_owned {
|
||||
Some(_) => resume_context_owned.unwrap_or_default(),
|
||||
None => {
|
||||
if let Some(ctx) = resume_context_owned {
|
||||
prompt.push_str(&ctx);
|
||||
}
|
||||
prompt
|
||||
}
|
||||
};
|
||||
|
||||
// Step 3: transition to Running now that the worktree is ready.
|
||||
{
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Running;
|
||||
}
|
||||
}
|
||||
let _ = tx_clone.send(AgentEvent::Status {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
status: "running".to_string(),
|
||||
});
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
|
||||
// Step 4: launch the agent process via the configured runtime.
|
||||
let runtime_name = config_clone
|
||||
.find_agent(&aname)
|
||||
.and_then(|a| a.runtime.as_deref())
|
||||
.unwrap_or("claude-code");
|
||||
|
||||
let run_result = match runtime_name {
|
||||
"claude-code" => {
|
||||
let runtime = ClaudeCodeRuntime::new(
|
||||
child_killers_clone.clone(),
|
||||
watcher_tx_clone.clone(),
|
||||
);
|
||||
let ctx = RuntimeContext {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
command,
|
||||
args,
|
||||
prompt: effective_prompt,
|
||||
cwd: wt_path_str,
|
||||
inactivity_timeout_secs,
|
||||
mcp_port: port_for_task,
|
||||
session_id_to_resume: session_id_to_resume_owned.clone(),
|
||||
};
|
||||
runtime
|
||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||
.await
|
||||
}
|
||||
"gemini" => {
|
||||
let runtime = GeminiRuntime::new();
|
||||
let ctx = RuntimeContext {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
command,
|
||||
args,
|
||||
prompt: effective_prompt,
|
||||
cwd: wt_path_str,
|
||||
inactivity_timeout_secs,
|
||||
mcp_port: port_for_task,
|
||||
session_id_to_resume: session_id_to_resume_owned.clone(),
|
||||
};
|
||||
runtime
|
||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||
.await
|
||||
}
|
||||
"openai" => {
|
||||
let runtime = OpenAiRuntime::new();
|
||||
let ctx = RuntimeContext {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
command,
|
||||
args,
|
||||
prompt: effective_prompt,
|
||||
cwd: wt_path_str,
|
||||
inactivity_timeout_secs,
|
||||
mcp_port: port_for_task,
|
||||
session_id_to_resume: session_id_to_resume_owned,
|
||||
};
|
||||
runtime
|
||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||
.await
|
||||
}
|
||||
other => Err(format!(
|
||||
"Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
|
||||
Supported: 'claude-code', 'gemini', 'openai'"
|
||||
)),
|
||||
};
|
||||
|
||||
match run_result {
|
||||
Ok(result) => {
|
||||
// Persist token usage if the agent reported it.
|
||||
if let Some(ref usage) = result.token_usage
|
||||
&& let Ok(agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get(&key_clone)
|
||||
&& let Some(ref pr) = agent.project_root
|
||||
{
|
||||
let model = config_clone
|
||||
.find_agent(&aname)
|
||||
.and_then(|a| a.model.clone());
|
||||
let record = crate::agents::token_usage::build_record(
|
||||
&sid,
|
||||
&aname,
|
||||
model,
|
||||
usage.clone(),
|
||||
);
|
||||
if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
|
||||
slog_error!(
|
||||
"[agents] Failed to persist token usage for \
|
||||
{sid}:{aname}: {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Mergemaster agents have their own completion path via
|
||||
// start_merge_agent_work / run_merge_pipeline and must NOT go
|
||||
// through server-owned gates. When a mergemaster exits early
|
||||
// (e.g. rate-limited before calling start_merge_agent_work) the
|
||||
// feature-branch worktree compiles fine and post-merge tests on
|
||||
// master pass (nothing changed), which would wrongly advance the
|
||||
// story to 5_done/ without any squash merge having occurred.
|
||||
// Instead: just remove the agent from the pool and let
|
||||
// auto-assign restart a new mergemaster for the story.
|
||||
let stage = config_clone
|
||||
.find_agent(&aname)
|
||||
.map(agent_config_stage)
|
||||
.unwrap_or_else(|| pipeline_stage(&aname));
|
||||
if stage == PipelineStage::Mergemaster {
|
||||
let (tx_done, done_session_id) = {
|
||||
let mut lock = match agents_ref.lock() {
|
||||
Ok(a) => a,
|
||||
Err(_) => return,
|
||||
};
|
||||
if let Some(agent) = lock.remove(&key_clone) {
|
||||
(agent.tx, agent.session_id.or(result.session_id))
|
||||
} else {
|
||||
(tx_clone.clone(), result.session_id)
|
||||
}
|
||||
};
|
||||
// Clear any stale Running merge job so the next mergemaster
|
||||
// can call start_merge_agent_work without hitting "Merge
|
||||
// already in progress" (bug 498).
|
||||
if let Ok(mut jobs) = merge_jobs_clone.lock()
|
||||
&& let Some(job) = jobs.get(&sid)
|
||||
&& matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
|
||||
{
|
||||
jobs.remove(&sid);
|
||||
}
|
||||
let _ = tx_done.send(AgentEvent::Done {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
session_id: done_session_id,
|
||||
});
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
// Send a WorkItem event so the auto-assign watcher loop
|
||||
// re-dispatches a new mergemaster if the story still needs
|
||||
// merging. This avoids an async call to start_agent inside
|
||||
// a tokio::spawn (which would require Send).
|
||||
let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
|
||||
stage: "4_merge".to_string(),
|
||||
item_id: sid.clone(),
|
||||
action: "reassign".to_string(),
|
||||
commit_msg: String::new(),
|
||||
from_stage: None,
|
||||
});
|
||||
} else {
|
||||
// Server-owned completion: run acceptance gates automatically
|
||||
// when the agent process exits normally.
|
||||
super::super::pipeline::run_server_owned_completion(
|
||||
&agents_ref,
|
||||
port_for_task,
|
||||
&sid,
|
||||
&aname,
|
||||
result.session_id,
|
||||
watcher_tx_clone.clone(),
|
||||
)
|
||||
.await;
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
|
||||
let event = AgentEvent::Error {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
message: e,
|
||||
};
|
||||
if let Ok(mut log) = log_clone.lock() {
|
||||
log.push(event.clone());
|
||||
}
|
||||
let _ = tx_clone.send(event);
|
||||
if let Ok(mut agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||
{
|
||||
agent.status = AgentStatus::Failed;
|
||||
}
|
||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
//! Pre-lock validation helpers for `AgentPool::start_agent`.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use crate::config::ProjectConfig;
|
||||
|
||||
use super::super::super::{PipelineStage, agent_config_stage, pipeline_stage};
|
||||
use super::super::worktree::find_active_story_stage;
|
||||
|
||||
/// Validate that an explicit `agent_name` is allowed to attach to `story_id`'s
|
||||
/// current pipeline stage.
|
||||
///
|
||||
/// Prevents wrong-stage assignments like a mergemaster on a coding-stage story
|
||||
/// (bug 312). Returns `Ok(())` if the agent has no specific stage (e.g.
|
||||
/// supervisor) or the story is not in an active stage; `Err` with a descriptive
|
||||
/// message on a stage mismatch.
|
||||
pub(super) fn validate_agent_stage(
|
||||
config: &ProjectConfig,
|
||||
project_root: &Path,
|
||||
story_id: &str,
|
||||
agent_name: Option<&str>,
|
||||
) -> Result<(), String> {
|
||||
let Some(name) = agent_name else {
|
||||
return Ok(());
|
||||
};
|
||||
let agent_stage = config
|
||||
.find_agent(name)
|
||||
.map(agent_config_stage)
|
||||
.unwrap_or_else(|| pipeline_stage(name));
|
||||
if agent_stage == PipelineStage::Other {
|
||||
return Ok(());
|
||||
}
|
||||
let Some(story_stage_dir) = find_active_story_stage(project_root, story_id) else {
|
||||
return Ok(());
|
||||
};
|
||||
let expected_stage = match story_stage_dir {
|
||||
"2_current" => PipelineStage::Coder,
|
||||
"3_qa" => PipelineStage::Qa,
|
||||
"4_merge" => PipelineStage::Mergemaster,
|
||||
_ => PipelineStage::Other,
|
||||
};
|
||||
if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
|
||||
return Err(format!(
|
||||
"Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
|
||||
story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read the preferred `agent:` field from the story's front matter.
|
||||
///
|
||||
/// When `agent_name` is `None` (caller is auto-selecting), this lets
|
||||
/// `start_agent` honour an explicit `agent: coder-opus` written by the
|
||||
/// `assign` command (bug 379). Returns `None` when an explicit agent_name
|
||||
/// was already supplied or when the story has no front-matter preference.
|
||||
pub(super) fn read_front_matter_agent(
|
||||
story_id: &str,
|
||||
agent_name: Option<&str>,
|
||||
) -> Option<String> {
|
||||
if agent_name.is_some() {
|
||||
return None;
|
||||
}
|
||||
crate::db::read_content(story_id).and_then(|contents| {
|
||||
crate::io::story_metadata::parse_front_matter(&contents)
|
||||
.ok()?
|
||||
.agent
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user