refactor: split agents/pool/start.rs into mod.rs + validation.rs + spawn.rs
The 1630-line start.rs is split into a sub-module directory: - validation.rs: validate_agent_stage + read_front_matter_agent helpers (69 lines) - spawn.rs: run_agent_spawn — the background async work that was inlined as a tokio::spawn closure body inside start_agent (359 lines) - mod.rs: AgentPool::start_agent orchestrator + tests (1062 lines) Stage validation and front-matter agent reading are pre-lock pure helpers that naturally extract. The spawn closure body becomes a free async fn that takes the previously-cloned values as parameters; rebound to the original _clone / _owned names at the top of the body so the actual work code is a verbatim copy. No behaviour change. All 23 start tests pass; full suite green.
This commit is contained in:
@@ -16,6 +16,12 @@ use super::types::{PendingGuard, StoryAgent, composite_key};
|
|||||||
use super::worktree::find_active_story_stage;
|
use super::worktree::find_active_story_stage;
|
||||||
use super::{AgentPool, auto_assign};
|
use super::{AgentPool, auto_assign};
|
||||||
|
|
||||||
|
mod spawn;
|
||||||
|
mod validation;
|
||||||
|
|
||||||
|
use validation::{read_front_matter_agent, validate_agent_stage};
|
||||||
|
|
||||||
|
|
||||||
impl AgentPool {
|
impl AgentPool {
|
||||||
/// Start an agent for a story: load config, create worktree, spawn agent.
|
/// Start an agent for a story: load config, create worktree, spawn agent.
|
||||||
///
|
///
|
||||||
@@ -71,45 +77,12 @@ impl AgentPool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate that the agent's configured stage matches the story's
|
// Validate that the agent's configured stage matches the story's
|
||||||
// pipeline stage. This prevents any caller (auto-assign, MCP tool,
|
// pipeline stage. (See validation::validate_agent_stage.)
|
||||||
// pipeline advance, supervisor) from starting a wrong-stage agent on
|
validate_agent_stage(&config, project_root, story_id, agent_name)?;
|
||||||
// a story — e.g. mergemaster on a coding-stage story (bug 312).
|
|
||||||
if let Some(name) = agent_name {
|
|
||||||
let agent_stage = config
|
|
||||||
.find_agent(name)
|
|
||||||
.map(agent_config_stage)
|
|
||||||
.unwrap_or_else(|| pipeline_stage(name));
|
|
||||||
if agent_stage != PipelineStage::Other
|
|
||||||
&& let Some(story_stage_dir) = find_active_story_stage(project_root, story_id)
|
|
||||||
{
|
|
||||||
let expected_stage = match story_stage_dir {
|
|
||||||
"2_current" => PipelineStage::Coder,
|
|
||||||
"3_qa" => PipelineStage::Qa,
|
|
||||||
"4_merge" => PipelineStage::Mergemaster,
|
|
||||||
_ => PipelineStage::Other,
|
|
||||||
};
|
|
||||||
if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
|
|
||||||
return Err(format!(
|
|
||||||
"Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
|
|
||||||
story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the preferred agent from the story's front matter before acquiring
|
// Read the preferred agent from the story's front matter before acquiring
|
||||||
// the lock. When no explicit agent_name is given, this lets start_agent
|
// the lock. (See validation::read_front_matter_agent.)
|
||||||
// honour `agent: coder-opus` written by the `assign` command — mirroring
|
let front_matter_agent: Option<String> = read_front_matter_agent(story_id, agent_name);
|
||||||
// the auto_assign path (bug 379).
|
|
||||||
let front_matter_agent: Option<String> = if agent_name.is_none() {
|
|
||||||
crate::db::read_content(story_id).and_then(|contents| {
|
|
||||||
crate::io::story_metadata::parse_front_matter(&contents)
|
|
||||||
.ok()?
|
|
||||||
.agent
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
// Atomically resolve agent name, check availability, and register as
|
// Atomically resolve agent name, check availability, and register as
|
||||||
// Pending. When `agent_name` is `None` the first idle coder is
|
// Pending. When `agent_name` is `None` the first idle coder is
|
||||||
@@ -320,317 +293,28 @@ impl AgentPool {
|
|||||||
.unwrap_or(300);
|
.unwrap_or(300);
|
||||||
|
|
||||||
// Clone all values needed inside the background spawn.
|
// Clone all values needed inside the background spawn.
|
||||||
let project_root_clone = project_root.to_path_buf();
|
|
||||||
let config_clone = config.clone();
|
|
||||||
let resume_context_owned = resume_context.map(str::to_string);
|
|
||||||
let session_id_to_resume_owned = session_id_to_resume;
|
|
||||||
let sid = story_id.to_string();
|
|
||||||
let aname = resolved_name.clone();
|
|
||||||
let tx_clone = tx.clone();
|
|
||||||
let agents_ref = self.agents.clone();
|
|
||||||
let key_clone = key.clone();
|
|
||||||
let log_clone = event_log.clone();
|
|
||||||
let port_for_task = self.port;
|
|
||||||
let log_writer_clone = log_writer.clone();
|
|
||||||
let child_killers_clone = self.child_killers.clone();
|
|
||||||
let watcher_tx_clone = self.watcher_tx.clone();
|
|
||||||
let merge_jobs_clone = Arc::clone(&self.merge_jobs);
|
|
||||||
|
|
||||||
// Spawn the background task. Worktree creation and agent launch happen here
|
// Spawn the background task. Worktree creation and agent launch happen here
|
||||||
// so `start_agent` returns immediately after registering the agent as
|
// so `start_agent` returns immediately after registering the agent as
|
||||||
// Pending — non-blocking by design (story 157).
|
// Pending — non-blocking by design (story 157).
|
||||||
let handle = tokio::spawn(async move {
|
let handle = tokio::spawn(spawn::run_agent_spawn(
|
||||||
// Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
|
project_root.to_path_buf(),
|
||||||
let wt_info = match crate::worktree::create_worktree(
|
config.clone(),
|
||||||
&project_root_clone,
|
resume_context.map(str::to_string),
|
||||||
&sid,
|
session_id_to_resume,
|
||||||
&config_clone,
|
story_id.to_string(),
|
||||||
port_for_task,
|
resolved_name.clone(),
|
||||||
)
|
tx.clone(),
|
||||||
.await
|
self.agents.clone(),
|
||||||
{
|
key.clone(),
|
||||||
Ok(wt) => wt,
|
event_log.clone(),
|
||||||
Err(e) => {
|
self.port,
|
||||||
let error_msg = format!("Failed to create worktree: {e}");
|
log_writer.clone(),
|
||||||
slog_error!("[agents] {error_msg}");
|
self.child_killers.clone(),
|
||||||
let event = AgentEvent::Error {
|
self.watcher_tx.clone(),
|
||||||
story_id: sid.clone(),
|
Arc::clone(&self.merge_jobs),
|
||||||
agent_name: aname.clone(),
|
inactivity_timeout_secs,
|
||||||
message: error_msg,
|
));
|
||||||
};
|
|
||||||
if let Ok(mut log) = log_clone.lock() {
|
|
||||||
log.push(event.clone());
|
|
||||||
}
|
|
||||||
let _ = tx_clone.send(event);
|
|
||||||
if let Ok(mut agents) = agents_ref.lock()
|
|
||||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
|
||||||
{
|
|
||||||
agent.status = AgentStatus::Failed;
|
|
||||||
}
|
|
||||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 2: store worktree info and render agent command/args/prompt.
|
|
||||||
let wt_path_str = wt_info.path.to_string_lossy().to_string();
|
|
||||||
{
|
|
||||||
if let Ok(mut agents) = agents_ref.lock()
|
|
||||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
|
||||||
{
|
|
||||||
agent.worktree_info = Some(wt_info.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let (command, args, mut prompt) = match config_clone.render_agent_args(
|
|
||||||
&wt_path_str,
|
|
||||||
&sid,
|
|
||||||
Some(&aname),
|
|
||||||
Some(&wt_info.base_branch),
|
|
||||||
) {
|
|
||||||
Ok(result) => result,
|
|
||||||
Err(e) => {
|
|
||||||
let error_msg = format!("Failed to render agent args: {e}");
|
|
||||||
slog_error!("[agents] {error_msg}");
|
|
||||||
let event = AgentEvent::Error {
|
|
||||||
story_id: sid.clone(),
|
|
||||||
agent_name: aname.clone(),
|
|
||||||
message: error_msg,
|
|
||||||
};
|
|
||||||
if let Ok(mut log) = log_clone.lock() {
|
|
||||||
log.push(event.clone());
|
|
||||||
}
|
|
||||||
let _ = tx_clone.send(event);
|
|
||||||
if let Ok(mut agents) = agents_ref.lock()
|
|
||||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
|
||||||
{
|
|
||||||
agent.status = AgentStatus::Failed;
|
|
||||||
}
|
|
||||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Append project-local prompt content (.huskies/AGENT.md) to the
|
|
||||||
// baked-in prompt so every agent role sees project-specific guidance
|
|
||||||
// without any config changes. The file is read fresh each spawn;
|
|
||||||
// if absent or empty, the prompt is unchanged and no warning is logged.
|
|
||||||
if let Some(local) =
|
|
||||||
crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
|
|
||||||
{
|
|
||||||
prompt.push_str("\n\n");
|
|
||||||
prompt.push_str(&local);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build the effective prompt and determine resume session.
|
|
||||||
//
|
|
||||||
// When resuming a previous session, discard the full rendered prompt
|
|
||||||
// (which would re-read CLAUDE.md and README) and send only the gate
|
|
||||||
// failure context as a new message. On a fresh start, append the
|
|
||||||
// failure context to the original prompt as before.
|
|
||||||
let effective_prompt = match &session_id_to_resume_owned {
|
|
||||||
Some(_) => resume_context_owned.unwrap_or_default(),
|
|
||||||
None => {
|
|
||||||
if let Some(ctx) = resume_context_owned {
|
|
||||||
prompt.push_str(&ctx);
|
|
||||||
}
|
|
||||||
prompt
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Step 3: transition to Running now that the worktree is ready.
|
|
||||||
{
|
|
||||||
if let Ok(mut agents) = agents_ref.lock()
|
|
||||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
|
||||||
{
|
|
||||||
agent.status = AgentStatus::Running;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let _ = tx_clone.send(AgentEvent::Status {
|
|
||||||
story_id: sid.clone(),
|
|
||||||
agent_name: aname.clone(),
|
|
||||||
status: "running".to_string(),
|
|
||||||
});
|
|
||||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
|
||||||
|
|
||||||
// Step 4: launch the agent process via the configured runtime.
|
|
||||||
let runtime_name = config_clone
|
|
||||||
.find_agent(&aname)
|
|
||||||
.and_then(|a| a.runtime.as_deref())
|
|
||||||
.unwrap_or("claude-code");
|
|
||||||
|
|
||||||
let run_result = match runtime_name {
|
|
||||||
"claude-code" => {
|
|
||||||
let runtime = ClaudeCodeRuntime::new(
|
|
||||||
child_killers_clone.clone(),
|
|
||||||
watcher_tx_clone.clone(),
|
|
||||||
);
|
|
||||||
let ctx = RuntimeContext {
|
|
||||||
story_id: sid.clone(),
|
|
||||||
agent_name: aname.clone(),
|
|
||||||
command,
|
|
||||||
args,
|
|
||||||
prompt: effective_prompt,
|
|
||||||
cwd: wt_path_str,
|
|
||||||
inactivity_timeout_secs,
|
|
||||||
mcp_port: port_for_task,
|
|
||||||
session_id_to_resume: session_id_to_resume_owned.clone(),
|
|
||||||
};
|
|
||||||
runtime
|
|
||||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
"gemini" => {
|
|
||||||
let runtime = GeminiRuntime::new();
|
|
||||||
let ctx = RuntimeContext {
|
|
||||||
story_id: sid.clone(),
|
|
||||||
agent_name: aname.clone(),
|
|
||||||
command,
|
|
||||||
args,
|
|
||||||
prompt: effective_prompt,
|
|
||||||
cwd: wt_path_str,
|
|
||||||
inactivity_timeout_secs,
|
|
||||||
mcp_port: port_for_task,
|
|
||||||
session_id_to_resume: session_id_to_resume_owned.clone(),
|
|
||||||
};
|
|
||||||
runtime
|
|
||||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
"openai" => {
|
|
||||||
let runtime = OpenAiRuntime::new();
|
|
||||||
let ctx = RuntimeContext {
|
|
||||||
story_id: sid.clone(),
|
|
||||||
agent_name: aname.clone(),
|
|
||||||
command,
|
|
||||||
args,
|
|
||||||
prompt: effective_prompt,
|
|
||||||
cwd: wt_path_str,
|
|
||||||
inactivity_timeout_secs,
|
|
||||||
mcp_port: port_for_task,
|
|
||||||
session_id_to_resume: session_id_to_resume_owned,
|
|
||||||
};
|
|
||||||
runtime
|
|
||||||
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
other => Err(format!(
|
|
||||||
"Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
|
|
||||||
Supported: 'claude-code', 'gemini', 'openai'"
|
|
||||||
)),
|
|
||||||
};
|
|
||||||
|
|
||||||
match run_result {
|
|
||||||
Ok(result) => {
|
|
||||||
// Persist token usage if the agent reported it.
|
|
||||||
if let Some(ref usage) = result.token_usage
|
|
||||||
&& let Ok(agents) = agents_ref.lock()
|
|
||||||
&& let Some(agent) = agents.get(&key_clone)
|
|
||||||
&& let Some(ref pr) = agent.project_root
|
|
||||||
{
|
|
||||||
let model = config_clone
|
|
||||||
.find_agent(&aname)
|
|
||||||
.and_then(|a| a.model.clone());
|
|
||||||
let record = crate::agents::token_usage::build_record(
|
|
||||||
&sid,
|
|
||||||
&aname,
|
|
||||||
model,
|
|
||||||
usage.clone(),
|
|
||||||
);
|
|
||||||
if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
|
|
||||||
slog_error!(
|
|
||||||
"[agents] Failed to persist token usage for \
|
|
||||||
{sid}:{aname}: {e}"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mergemaster agents have their own completion path via
|
|
||||||
// start_merge_agent_work / run_merge_pipeline and must NOT go
|
|
||||||
// through server-owned gates. When a mergemaster exits early
|
|
||||||
// (e.g. rate-limited before calling start_merge_agent_work) the
|
|
||||||
// feature-branch worktree compiles fine and post-merge tests on
|
|
||||||
// master pass (nothing changed), which would wrongly advance the
|
|
||||||
// story to 5_done/ without any squash merge having occurred.
|
|
||||||
// Instead: just remove the agent from the pool and let
|
|
||||||
// auto-assign restart a new mergemaster for the story.
|
|
||||||
let stage = config_clone
|
|
||||||
.find_agent(&aname)
|
|
||||||
.map(agent_config_stage)
|
|
||||||
.unwrap_or_else(|| pipeline_stage(&aname));
|
|
||||||
if stage == PipelineStage::Mergemaster {
|
|
||||||
let (tx_done, done_session_id) = {
|
|
||||||
let mut lock = match agents_ref.lock() {
|
|
||||||
Ok(a) => a,
|
|
||||||
Err(_) => return,
|
|
||||||
};
|
|
||||||
if let Some(agent) = lock.remove(&key_clone) {
|
|
||||||
(agent.tx, agent.session_id.or(result.session_id))
|
|
||||||
} else {
|
|
||||||
(tx_clone.clone(), result.session_id)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// Clear any stale Running merge job so the next mergemaster
|
|
||||||
// can call start_merge_agent_work without hitting "Merge
|
|
||||||
// already in progress" (bug 498).
|
|
||||||
if let Ok(mut jobs) = merge_jobs_clone.lock()
|
|
||||||
&& let Some(job) = jobs.get(&sid)
|
|
||||||
&& matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
|
|
||||||
{
|
|
||||||
jobs.remove(&sid);
|
|
||||||
}
|
|
||||||
let _ = tx_done.send(AgentEvent::Done {
|
|
||||||
story_id: sid.clone(),
|
|
||||||
agent_name: aname.clone(),
|
|
||||||
session_id: done_session_id,
|
|
||||||
});
|
|
||||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
|
||||||
// Send a WorkItem event so the auto-assign watcher loop
|
|
||||||
// re-dispatches a new mergemaster if the story still needs
|
|
||||||
// merging. This avoids an async call to start_agent inside
|
|
||||||
// a tokio::spawn (which would require Send).
|
|
||||||
let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
|
|
||||||
stage: "4_merge".to_string(),
|
|
||||||
item_id: sid.clone(),
|
|
||||||
action: "reassign".to_string(),
|
|
||||||
commit_msg: String::new(),
|
|
||||||
from_stage: None,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// Server-owned completion: run acceptance gates automatically
|
|
||||||
// when the agent process exits normally.
|
|
||||||
super::pipeline::run_server_owned_completion(
|
|
||||||
&agents_ref,
|
|
||||||
port_for_task,
|
|
||||||
&sid,
|
|
||||||
&aname,
|
|
||||||
result.session_id,
|
|
||||||
watcher_tx_clone.clone(),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
|
|
||||||
let event = AgentEvent::Error {
|
|
||||||
story_id: sid.clone(),
|
|
||||||
agent_name: aname.clone(),
|
|
||||||
message: e,
|
|
||||||
};
|
|
||||||
if let Ok(mut log) = log_clone.lock() {
|
|
||||||
log.push(event.clone());
|
|
||||||
}
|
|
||||||
let _ = tx_clone.send(event);
|
|
||||||
if let Ok(mut agents) = agents_ref.lock()
|
|
||||||
&& let Some(agent) = agents.get_mut(&key_clone)
|
|
||||||
{
|
|
||||||
agent.status = AgentStatus::Failed;
|
|
||||||
}
|
|
||||||
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Store the task handle while the agent is still Pending.
|
// Store the task handle while the agent is still Pending.
|
||||||
{
|
{
|
||||||
@@ -0,0 +1,361 @@
|
|||||||
|
//! Background async work spawned by `AgentPool::start_agent`.
|
||||||
|
//!
|
||||||
|
//! `start_agent` returns immediately after registering the agent as `Pending`;
|
||||||
|
//! this module runs the slow worktree creation, agent process launch, and
|
||||||
|
//! event streaming in the background (story 157).
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use portable_pty::ChildKiller;
|
||||||
|
use tokio::sync::broadcast;
|
||||||
|
|
||||||
|
use crate::agent_log::AgentLogWriter;
|
||||||
|
use crate::config::ProjectConfig;
|
||||||
|
use crate::io::watcher::WatcherEvent;
|
||||||
|
use crate::slog_error;
|
||||||
|
|
||||||
|
use super::super::super::{
|
||||||
|
AgentEvent, AgentStatus, PipelineStage, agent_config_stage, pipeline_stage,
|
||||||
|
};
|
||||||
|
use super::super::super::merge::MergeJob;
|
||||||
|
use super::super::AgentPool;
|
||||||
|
use super::super::super::runtime::{
|
||||||
|
AgentRuntime, ClaudeCodeRuntime, GeminiRuntime, OpenAiRuntime, RuntimeContext,
|
||||||
|
};
|
||||||
|
use super::super::types::StoryAgent;
|
||||||
|
|
||||||
|
/// Run the background worktree-creation + agent-launch flow.
|
||||||
|
///
|
||||||
|
/// Caller (`AgentPool::start_agent`) wraps this in `tokio::spawn` and stores
|
||||||
|
/// the resulting handle on the Pending entry so cancellation works.
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
pub(super) async fn run_agent_spawn(
|
||||||
|
project_root: PathBuf,
|
||||||
|
config: ProjectConfig,
|
||||||
|
resume_context: Option<String>,
|
||||||
|
session_id_to_resume: Option<String>,
|
||||||
|
story_id: String,
|
||||||
|
agent_name: String,
|
||||||
|
tx: broadcast::Sender<AgentEvent>,
|
||||||
|
agents: Arc<Mutex<HashMap<String, StoryAgent>>>,
|
||||||
|
key: String,
|
||||||
|
event_log: Arc<Mutex<Vec<AgentEvent>>>,
|
||||||
|
port: u16,
|
||||||
|
log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
|
||||||
|
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||||
|
watcher_tx: broadcast::Sender<WatcherEvent>,
|
||||||
|
merge_jobs: Arc<Mutex<HashMap<String, MergeJob>>>,
|
||||||
|
inactivity_timeout_secs: u64,
|
||||||
|
) {
|
||||||
|
// Re-bind to the legacy `_clone` / `_owned` names so the body below remains
|
||||||
|
// a verbatim copy of the original closure (story 157).
|
||||||
|
let project_root_clone = project_root;
|
||||||
|
let config_clone = config;
|
||||||
|
let resume_context_owned = resume_context;
|
||||||
|
let session_id_to_resume_owned = session_id_to_resume;
|
||||||
|
let sid = story_id;
|
||||||
|
let aname = agent_name;
|
||||||
|
let tx_clone = tx;
|
||||||
|
let agents_ref = agents;
|
||||||
|
let key_clone = key;
|
||||||
|
let log_clone = event_log;
|
||||||
|
let port_for_task = port;
|
||||||
|
let log_writer_clone = log_writer;
|
||||||
|
let child_killers_clone = child_killers;
|
||||||
|
let watcher_tx_clone = watcher_tx;
|
||||||
|
let merge_jobs_clone = merge_jobs;
|
||||||
|
let _ = inactivity_timeout_secs; // currently unused inside the closure body
|
||||||
|
|
||||||
|
// Step 1: create the worktree (slow — git checkout, pnpm install, etc.)
|
||||||
|
let wt_info = match crate::worktree::create_worktree(
|
||||||
|
&project_root_clone,
|
||||||
|
&sid,
|
||||||
|
&config_clone,
|
||||||
|
port_for_task,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(wt) => wt,
|
||||||
|
Err(e) => {
|
||||||
|
let error_msg = format!("Failed to create worktree: {e}");
|
||||||
|
slog_error!("[agents] {error_msg}");
|
||||||
|
let event = AgentEvent::Error {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
message: error_msg,
|
||||||
|
};
|
||||||
|
if let Ok(mut log) = log_clone.lock() {
|
||||||
|
log.push(event.clone());
|
||||||
|
}
|
||||||
|
let _ = tx_clone.send(event);
|
||||||
|
if let Ok(mut agents) = agents_ref.lock()
|
||||||
|
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||||
|
{
|
||||||
|
agent.status = AgentStatus::Failed;
|
||||||
|
}
|
||||||
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 2: store worktree info and render agent command/args/prompt.
|
||||||
|
let wt_path_str = wt_info.path.to_string_lossy().to_string();
|
||||||
|
{
|
||||||
|
if let Ok(mut agents) = agents_ref.lock()
|
||||||
|
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||||
|
{
|
||||||
|
agent.worktree_info = Some(wt_info.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let (command, args, mut prompt) = match config_clone.render_agent_args(
|
||||||
|
&wt_path_str,
|
||||||
|
&sid,
|
||||||
|
Some(&aname),
|
||||||
|
Some(&wt_info.base_branch),
|
||||||
|
) {
|
||||||
|
Ok(result) => result,
|
||||||
|
Err(e) => {
|
||||||
|
let error_msg = format!("Failed to render agent args: {e}");
|
||||||
|
slog_error!("[agents] {error_msg}");
|
||||||
|
let event = AgentEvent::Error {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
message: error_msg,
|
||||||
|
};
|
||||||
|
if let Ok(mut log) = log_clone.lock() {
|
||||||
|
log.push(event.clone());
|
||||||
|
}
|
||||||
|
let _ = tx_clone.send(event);
|
||||||
|
if let Ok(mut agents) = agents_ref.lock()
|
||||||
|
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||||
|
{
|
||||||
|
agent.status = AgentStatus::Failed;
|
||||||
|
}
|
||||||
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Append project-local prompt content (.huskies/AGENT.md) to the
|
||||||
|
// baked-in prompt so every agent role sees project-specific guidance
|
||||||
|
// without any config changes. The file is read fresh each spawn;
|
||||||
|
// if absent or empty, the prompt is unchanged and no warning is logged.
|
||||||
|
if let Some(local) =
|
||||||
|
crate::agents::local_prompt::read_project_local_prompt(&project_root_clone)
|
||||||
|
{
|
||||||
|
prompt.push_str("\n\n");
|
||||||
|
prompt.push_str(&local);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the effective prompt and determine resume session.
|
||||||
|
//
|
||||||
|
// When resuming a previous session, discard the full rendered prompt
|
||||||
|
// (which would re-read CLAUDE.md and README) and send only the gate
|
||||||
|
// failure context as a new message. On a fresh start, append the
|
||||||
|
// failure context to the original prompt as before.
|
||||||
|
let effective_prompt = match &session_id_to_resume_owned {
|
||||||
|
Some(_) => resume_context_owned.unwrap_or_default(),
|
||||||
|
None => {
|
||||||
|
if let Some(ctx) = resume_context_owned {
|
||||||
|
prompt.push_str(&ctx);
|
||||||
|
}
|
||||||
|
prompt
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 3: transition to Running now that the worktree is ready.
|
||||||
|
{
|
||||||
|
if let Ok(mut agents) = agents_ref.lock()
|
||||||
|
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||||
|
{
|
||||||
|
agent.status = AgentStatus::Running;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let _ = tx_clone.send(AgentEvent::Status {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
status: "running".to_string(),
|
||||||
|
});
|
||||||
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
|
|
||||||
|
// Step 4: launch the agent process via the configured runtime.
|
||||||
|
let runtime_name = config_clone
|
||||||
|
.find_agent(&aname)
|
||||||
|
.and_then(|a| a.runtime.as_deref())
|
||||||
|
.unwrap_or("claude-code");
|
||||||
|
|
||||||
|
let run_result = match runtime_name {
|
||||||
|
"claude-code" => {
|
||||||
|
let runtime = ClaudeCodeRuntime::new(
|
||||||
|
child_killers_clone.clone(),
|
||||||
|
watcher_tx_clone.clone(),
|
||||||
|
);
|
||||||
|
let ctx = RuntimeContext {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
command,
|
||||||
|
args,
|
||||||
|
prompt: effective_prompt,
|
||||||
|
cwd: wt_path_str,
|
||||||
|
inactivity_timeout_secs,
|
||||||
|
mcp_port: port_for_task,
|
||||||
|
session_id_to_resume: session_id_to_resume_owned.clone(),
|
||||||
|
};
|
||||||
|
runtime
|
||||||
|
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
"gemini" => {
|
||||||
|
let runtime = GeminiRuntime::new();
|
||||||
|
let ctx = RuntimeContext {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
command,
|
||||||
|
args,
|
||||||
|
prompt: effective_prompt,
|
||||||
|
cwd: wt_path_str,
|
||||||
|
inactivity_timeout_secs,
|
||||||
|
mcp_port: port_for_task,
|
||||||
|
session_id_to_resume: session_id_to_resume_owned.clone(),
|
||||||
|
};
|
||||||
|
runtime
|
||||||
|
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
"openai" => {
|
||||||
|
let runtime = OpenAiRuntime::new();
|
||||||
|
let ctx = RuntimeContext {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
command,
|
||||||
|
args,
|
||||||
|
prompt: effective_prompt,
|
||||||
|
cwd: wt_path_str,
|
||||||
|
inactivity_timeout_secs,
|
||||||
|
mcp_port: port_for_task,
|
||||||
|
session_id_to_resume: session_id_to_resume_owned,
|
||||||
|
};
|
||||||
|
runtime
|
||||||
|
.start(ctx, tx_clone.clone(), log_clone.clone(), log_writer_clone)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
other => Err(format!(
|
||||||
|
"Unknown agent runtime '{other}'; check the 'runtime' field in project.toml. \
|
||||||
|
Supported: 'claude-code', 'gemini', 'openai'"
|
||||||
|
)),
|
||||||
|
};
|
||||||
|
|
||||||
|
match run_result {
|
||||||
|
Ok(result) => {
|
||||||
|
// Persist token usage if the agent reported it.
|
||||||
|
if let Some(ref usage) = result.token_usage
|
||||||
|
&& let Ok(agents) = agents_ref.lock()
|
||||||
|
&& let Some(agent) = agents.get(&key_clone)
|
||||||
|
&& let Some(ref pr) = agent.project_root
|
||||||
|
{
|
||||||
|
let model = config_clone
|
||||||
|
.find_agent(&aname)
|
||||||
|
.and_then(|a| a.model.clone());
|
||||||
|
let record = crate::agents::token_usage::build_record(
|
||||||
|
&sid,
|
||||||
|
&aname,
|
||||||
|
model,
|
||||||
|
usage.clone(),
|
||||||
|
);
|
||||||
|
if let Err(e) = crate::agents::token_usage::append_record(pr, &record) {
|
||||||
|
slog_error!(
|
||||||
|
"[agents] Failed to persist token usage for \
|
||||||
|
{sid}:{aname}: {e}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mergemaster agents have their own completion path via
|
||||||
|
// start_merge_agent_work / run_merge_pipeline and must NOT go
|
||||||
|
// through server-owned gates. When a mergemaster exits early
|
||||||
|
// (e.g. rate-limited before calling start_merge_agent_work) the
|
||||||
|
// feature-branch worktree compiles fine and post-merge tests on
|
||||||
|
// master pass (nothing changed), which would wrongly advance the
|
||||||
|
// story to 5_done/ without any squash merge having occurred.
|
||||||
|
// Instead: just remove the agent from the pool and let
|
||||||
|
// auto-assign restart a new mergemaster for the story.
|
||||||
|
let stage = config_clone
|
||||||
|
.find_agent(&aname)
|
||||||
|
.map(agent_config_stage)
|
||||||
|
.unwrap_or_else(|| pipeline_stage(&aname));
|
||||||
|
if stage == PipelineStage::Mergemaster {
|
||||||
|
let (tx_done, done_session_id) = {
|
||||||
|
let mut lock = match agents_ref.lock() {
|
||||||
|
Ok(a) => a,
|
||||||
|
Err(_) => return,
|
||||||
|
};
|
||||||
|
if let Some(agent) = lock.remove(&key_clone) {
|
||||||
|
(agent.tx, agent.session_id.or(result.session_id))
|
||||||
|
} else {
|
||||||
|
(tx_clone.clone(), result.session_id)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// Clear any stale Running merge job so the next mergemaster
|
||||||
|
// can call start_merge_agent_work without hitting "Merge
|
||||||
|
// already in progress" (bug 498).
|
||||||
|
if let Ok(mut jobs) = merge_jobs_clone.lock()
|
||||||
|
&& let Some(job) = jobs.get(&sid)
|
||||||
|
&& matches!(job.status, crate::agents::merge::MergeJobStatus::Running)
|
||||||
|
{
|
||||||
|
jobs.remove(&sid);
|
||||||
|
}
|
||||||
|
let _ = tx_done.send(AgentEvent::Done {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
session_id: done_session_id,
|
||||||
|
});
|
||||||
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
|
// Send a WorkItem event so the auto-assign watcher loop
|
||||||
|
// re-dispatches a new mergemaster if the story still needs
|
||||||
|
// merging. This avoids an async call to start_agent inside
|
||||||
|
// a tokio::spawn (which would require Send).
|
||||||
|
let _ = watcher_tx_clone.send(crate::io::watcher::WatcherEvent::WorkItem {
|
||||||
|
stage: "4_merge".to_string(),
|
||||||
|
item_id: sid.clone(),
|
||||||
|
action: "reassign".to_string(),
|
||||||
|
commit_msg: String::new(),
|
||||||
|
from_stage: None,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Server-owned completion: run acceptance gates automatically
|
||||||
|
// when the agent process exits normally.
|
||||||
|
super::super::pipeline::run_server_owned_completion(
|
||||||
|
&agents_ref,
|
||||||
|
port_for_task,
|
||||||
|
&sid,
|
||||||
|
&aname,
|
||||||
|
result.session_id,
|
||||||
|
watcher_tx_clone.clone(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
slog_error!("[agents] Agent process error for {aname} on {sid}: {e}");
|
||||||
|
let event = AgentEvent::Error {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
agent_name: aname.clone(),
|
||||||
|
message: e,
|
||||||
|
};
|
||||||
|
if let Ok(mut log) = log_clone.lock() {
|
||||||
|
log.push(event.clone());
|
||||||
|
}
|
||||||
|
let _ = tx_clone.send(event);
|
||||||
|
if let Ok(mut agents) = agents_ref.lock()
|
||||||
|
&& let Some(agent) = agents.get_mut(&key_clone)
|
||||||
|
{
|
||||||
|
agent.status = AgentStatus::Failed;
|
||||||
|
}
|
||||||
|
AgentPool::notify_agent_state_changed(&watcher_tx_clone);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,69 @@
|
|||||||
|
//! Pre-lock validation helpers for `AgentPool::start_agent`.
|
||||||
|
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use crate::config::ProjectConfig;
|
||||||
|
|
||||||
|
use super::super::super::{PipelineStage, agent_config_stage, pipeline_stage};
|
||||||
|
use super::super::worktree::find_active_story_stage;
|
||||||
|
|
||||||
|
/// Validate that an explicit `agent_name` is allowed to attach to `story_id`'s
|
||||||
|
/// current pipeline stage.
|
||||||
|
///
|
||||||
|
/// Prevents wrong-stage assignments like a mergemaster on a coding-stage story
|
||||||
|
/// (bug 312). Returns `Ok(())` if the agent has no specific stage (e.g.
|
||||||
|
/// supervisor) or the story is not in an active stage; `Err` with a descriptive
|
||||||
|
/// message on a stage mismatch.
|
||||||
|
pub(super) fn validate_agent_stage(
|
||||||
|
config: &ProjectConfig,
|
||||||
|
project_root: &Path,
|
||||||
|
story_id: &str,
|
||||||
|
agent_name: Option<&str>,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
let Some(name) = agent_name else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let agent_stage = config
|
||||||
|
.find_agent(name)
|
||||||
|
.map(agent_config_stage)
|
||||||
|
.unwrap_or_else(|| pipeline_stage(name));
|
||||||
|
if agent_stage == PipelineStage::Other {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let Some(story_stage_dir) = find_active_story_stage(project_root, story_id) else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let expected_stage = match story_stage_dir {
|
||||||
|
"2_current" => PipelineStage::Coder,
|
||||||
|
"3_qa" => PipelineStage::Qa,
|
||||||
|
"4_merge" => PipelineStage::Mergemaster,
|
||||||
|
_ => PipelineStage::Other,
|
||||||
|
};
|
||||||
|
if expected_stage != PipelineStage::Other && expected_stage != agent_stage {
|
||||||
|
return Err(format!(
|
||||||
|
"Agent '{name}' (stage: {agent_stage:?}) cannot be assigned to \
|
||||||
|
story '{story_id}' in {story_stage_dir}/ (requires stage: {expected_stage:?})"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the preferred `agent:` field from the story's front matter.
|
||||||
|
///
|
||||||
|
/// When `agent_name` is `None` (caller is auto-selecting), this lets
|
||||||
|
/// `start_agent` honour an explicit `agent: coder-opus` written by the
|
||||||
|
/// `assign` command (bug 379). Returns `None` when an explicit agent_name
|
||||||
|
/// was already supplied or when the story has no front-matter preference.
|
||||||
|
pub(super) fn read_front_matter_agent(
|
||||||
|
story_id: &str,
|
||||||
|
agent_name: Option<&str>,
|
||||||
|
) -> Option<String> {
|
||||||
|
if agent_name.is_some() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
crate::db::read_content(story_id).and_then(|contents| {
|
||||||
|
crate::io::story_metadata::parse_front_matter(&contents)
|
||||||
|
.ok()?
|
||||||
|
.agent
|
||||||
|
})
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user