2026-04-12 13:11:23 +00:00
|
|
|
//! Pipeline advance — moves stories forward through pipeline stages after agent completion.
|
2026-04-27 01:32:08 +00:00
|
|
|
#![allow(unused_imports, dead_code)]
|
2026-03-28 11:47:36 +00:00
|
|
|
use crate::config::ProjectConfig;
|
2026-04-13 14:07:08 +00:00
|
|
|
use crate::io::watcher::WatcherEvent;
|
2026-03-28 11:47:36 +00:00
|
|
|
use crate::slog;
|
|
|
|
|
use crate::slog_error;
|
|
|
|
|
use crate::slog_warn;
|
|
|
|
|
use std::collections::HashMap;
|
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
|
use tokio::sync::broadcast;
|
|
|
|
|
|
2026-04-13 14:07:08 +00:00
|
|
|
use super::super::super::{CompletionReport, PipelineStage, agent_config_stage, pipeline_stage};
|
2026-03-28 11:47:36 +00:00
|
|
|
use super::super::{AgentPool, StoryAgent};
|
|
|
|
|
|
2026-04-28 23:06:40 +00:00
|
|
|
/// Maximum number of bytes of gate output to include in the failure context
|
|
|
|
|
/// injected into the resumed session. Keeps the injected message focused —
|
|
|
|
|
/// the tail of the output (where errors appear) is always preserved.
|
|
|
|
|
const MAX_GATE_OUTPUT_BYTES: usize = 8_000;
|
|
|
|
|
|
|
|
|
|
/// Truncate gate output to [`MAX_GATE_OUTPUT_BYTES`], keeping the **tail**
|
|
|
|
|
/// (where compiler errors and test failures are reported).
|
|
|
|
|
fn truncate_gate_output(output: &str) -> &str {
|
|
|
|
|
if output.len() <= MAX_GATE_OUTPUT_BYTES {
|
|
|
|
|
return output;
|
|
|
|
|
}
|
|
|
|
|
let start = output.len() - MAX_GATE_OUTPUT_BYTES;
|
|
|
|
|
// Advance to the next valid UTF-8 char boundary.
|
|
|
|
|
let mut adjusted = start;
|
|
|
|
|
while !output.is_char_boundary(adjusted) {
|
|
|
|
|
adjusted += 1;
|
|
|
|
|
}
|
|
|
|
|
&output[adjusted..]
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-28 11:47:36 +00:00
|
|
|
impl AgentPool {
|
|
|
|
|
/// Pipeline advancement: after an agent completes, move the story to
|
|
|
|
|
/// the next pipeline stage and start the appropriate agent.
|
2026-04-12 12:52:46 +00:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2026-03-28 11:47:36 +00:00
|
|
|
pub(super) async fn run_pipeline_advance(
|
|
|
|
|
&self,
|
|
|
|
|
story_id: &str,
|
|
|
|
|
agent_name: &str,
|
|
|
|
|
completion: CompletionReport,
|
|
|
|
|
project_root: Option<PathBuf>,
|
|
|
|
|
worktree_path: Option<PathBuf>,
|
|
|
|
|
merge_failure_reported: bool,
|
2026-04-12 12:52:46 +00:00
|
|
|
previous_session_id: Option<String>,
|
2026-03-28 11:47:36 +00:00
|
|
|
) {
|
|
|
|
|
let project_root = match project_root {
|
|
|
|
|
Some(p) => p,
|
|
|
|
|
None => {
|
|
|
|
|
slog_warn!("[pipeline] No project_root for '{story_id}:{agent_name}'");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let config = ProjectConfig::load(&project_root).unwrap_or_default();
|
|
|
|
|
let stage = config
|
|
|
|
|
.find_agent(agent_name)
|
|
|
|
|
.map(agent_config_stage)
|
|
|
|
|
.unwrap_or_else(|| pipeline_stage(agent_name));
|
|
|
|
|
|
2026-04-15 17:57:56 +00:00
|
|
|
// If the story is frozen, do not advance the pipeline. The agent's work
|
|
|
|
|
// is done but the story stays at its current stage.
|
|
|
|
|
if crate::io::story_metadata::is_story_frozen_in_store(story_id) {
|
|
|
|
|
slog!("[pipeline] Story '{story_id}' is frozen; pipeline advancement suppressed.");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-28 11:47:36 +00:00
|
|
|
match stage {
|
|
|
|
|
PipelineStage::Other => {
|
|
|
|
|
// Supervisors and unknown agents do not advance the pipeline.
|
|
|
|
|
}
|
|
|
|
|
PipelineStage::Coder => {
|
|
|
|
|
if completion.gates_passed {
|
|
|
|
|
// Determine effective QA mode for this story.
|
|
|
|
|
let qa_mode = {
|
|
|
|
|
let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
|
|
|
|
|
if item_type == "spike" {
|
|
|
|
|
crate::io::story_metadata::QaMode::Human
|
|
|
|
|
} else {
|
|
|
|
|
let default_qa = config.default_qa_mode();
|
2026-04-08 03:03:59 +00:00
|
|
|
resolve_qa_mode_from_store(&project_root, story_id, default_qa)
|
2026-03-28 11:47:36 +00:00
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
match qa_mode {
|
|
|
|
|
crate::io::story_metadata::QaMode::Server => {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Coder '{agent_name}' passed gates for '{story_id}'. \
|
|
|
|
|
qa: server — moving directly to merge."
|
|
|
|
|
);
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) = crate::agents::lifecycle::move_story_to_merge(story_id)
|
|
|
|
|
{
|
2026-03-28 11:47:36 +00:00
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to move '{story_id}' to 4_merge/: {e}"
|
|
|
|
|
);
|
2026-04-09 22:05:52 +00:00
|
|
|
} else {
|
2026-04-27 23:31:57 +00:00
|
|
|
self.trigger_server_side_merge(&project_root, story_id);
|
2026-03-28 11:47:36 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
crate::io::story_metadata::QaMode::Agent => {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Coder '{agent_name}' passed gates for '{story_id}'. \
|
|
|
|
|
qa: agent — moving to QA."
|
|
|
|
|
);
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) = crate::agents::lifecycle::move_story_to_qa(story_id) {
|
2026-03-28 11:47:36 +00:00
|
|
|
slog_error!("[pipeline] Failed to move '{story_id}' to 3_qa/: {e}");
|
|
|
|
|
} else if let Err(e) = self
|
2026-04-12 12:52:46 +00:00
|
|
|
.start_agent(&project_root, story_id, Some("qa"), None, None)
|
2026-03-28 11:47:36 +00:00
|
|
|
.await
|
|
|
|
|
{
|
2026-04-13 14:07:08 +00:00
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to start qa agent for '{story_id}': {e}"
|
|
|
|
|
);
|
2026-03-28 11:47:36 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
crate::io::story_metadata::QaMode::Human => {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Coder '{agent_name}' passed gates for '{story_id}'. \
|
|
|
|
|
qa: human — holding for human review."
|
|
|
|
|
);
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) = crate::agents::lifecycle::move_story_to_qa(story_id) {
|
2026-03-28 11:47:36 +00:00
|
|
|
slog_error!("[pipeline] Failed to move '{story_id}' to 3_qa/: {e}");
|
|
|
|
|
} else {
|
2026-04-08 03:03:59 +00:00
|
|
|
write_review_hold_to_store(story_id);
|
2026-03-28 11:47:36 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2026-04-27 11:33:36 +00:00
|
|
|
// Bug 645 / 668: Before retry/block, check if the agent left committed
|
|
|
|
|
// work AND the agent had a passing run_tests result captured during its
|
|
|
|
|
// session. An agent may crash mid-output (e.g. Claude Code CLI PTY write
|
|
|
|
|
// assertion) after having already committed valid code and run tests.
|
|
|
|
|
// We require positive test evidence (not just cargo check) so that only
|
|
|
|
|
// stories with genuinely passing test suites are salvaged.
|
|
|
|
|
//
|
|
|
|
|
// The `run_tests` MCP tool writes `{story_id}:run_tests_ok` to the DB
|
|
|
|
|
// whenever script/test exits 0 inside a story worktree. Consume the
|
|
|
|
|
// evidence here so it does not persist to the next agent session.
|
|
|
|
|
let has_test_evidence =
|
|
|
|
|
crate::db::read_content(&format!("{story_id}:run_tests_ok")).is_some();
|
|
|
|
|
crate::db::delete_content(&format!("{story_id}:run_tests_ok"));
|
|
|
|
|
let work_survived = has_test_evidence
|
|
|
|
|
&& worktree_path.as_ref().is_some_and(|wt_path| {
|
|
|
|
|
crate::agents::gates::worktree_has_committed_work(wt_path)
|
|
|
|
|
});
|
2026-04-26 10:50:40 +00:00
|
|
|
if work_survived {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Coder '{agent_name}' failed gates for '{story_id}' but \
|
2026-04-27 11:33:36 +00:00
|
|
|
committed work survives with captured passing tests. Advancing to QA \
|
|
|
|
|
instead of retrying (bug 645)."
|
2026-04-26 10:50:40 +00:00
|
|
|
);
|
|
|
|
|
let qa_mode = {
|
|
|
|
|
let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
|
|
|
|
|
if item_type == "spike" {
|
|
|
|
|
crate::io::story_metadata::QaMode::Human
|
|
|
|
|
} else {
|
|
|
|
|
let default_qa = config.default_qa_mode();
|
|
|
|
|
resolve_qa_mode_from_store(&project_root, story_id, default_qa)
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
match qa_mode {
|
|
|
|
|
crate::io::story_metadata::QaMode::Server => {
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) =
|
|
|
|
|
crate::agents::lifecycle::move_story_to_merge(story_id)
|
|
|
|
|
{
|
2026-04-26 10:50:40 +00:00
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to move '{story_id}' to 4_merge/: {e}"
|
|
|
|
|
);
|
|
|
|
|
} else {
|
2026-04-27 23:31:57 +00:00
|
|
|
self.trigger_server_side_merge(&project_root, story_id);
|
2026-04-26 10:50:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
crate::io::story_metadata::QaMode::Agent => {
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) = crate::agents::lifecycle::move_story_to_qa(story_id)
|
|
|
|
|
{
|
2026-04-26 10:50:40 +00:00
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to move '{story_id}' to 3_qa/: {e}"
|
|
|
|
|
);
|
|
|
|
|
} else if let Err(e) = self
|
|
|
|
|
.start_agent(&project_root, story_id, Some("qa"), None, None)
|
|
|
|
|
.await
|
|
|
|
|
{
|
|
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to start qa for '{story_id}': {e}"
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
crate::io::story_metadata::QaMode::Human => {
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) = crate::agents::lifecycle::move_story_to_qa(story_id)
|
|
|
|
|
{
|
2026-04-26 10:50:40 +00:00
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to move '{story_id}' to 3_qa/: {e}"
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
write_review_hold_to_store(story_id);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-28 11:47:36 +00:00
|
|
|
} else {
|
2026-04-29 22:48:28 +00:00
|
|
|
// Persist gate_output so the retry spawn can inject it into
|
|
|
|
|
// --append-system-prompt (story 881).
|
|
|
|
|
crate::db::write_content(
|
|
|
|
|
&format!("{story_id}:gate_output"),
|
|
|
|
|
&completion.gate_output,
|
2026-03-28 11:47:36 +00:00
|
|
|
);
|
2026-04-29 22:48:28 +00:00
|
|
|
// Increment retry count and check if blocked.
|
|
|
|
|
if let Some(reason) =
|
|
|
|
|
should_block_story(story_id, config.max_retries, "coder")
|
2026-03-28 11:47:36 +00:00
|
|
|
{
|
2026-04-29 22:48:28 +00:00
|
|
|
// Story has exceeded retry limit — do not restart.
|
|
|
|
|
let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
|
|
|
|
|
story_id: story_id.to_string(),
|
|
|
|
|
reason,
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Coder '{agent_name}' failed gates for '{story_id}'. Restarting."
|
|
|
|
|
);
|
|
|
|
|
let context = format!(
|
|
|
|
|
"\n\n---\n## Previous Attempt Failed\n\
|
|
|
|
|
The acceptance gates failed with the following output:\n{}\n\n\
|
|
|
|
|
Please review the failures above, fix the issues, and try again.",
|
|
|
|
|
truncate_gate_output(&completion.gate_output)
|
2026-03-28 11:47:36 +00:00
|
|
|
);
|
2026-04-29 22:48:28 +00:00
|
|
|
if let Err(e) = self
|
|
|
|
|
.start_agent(
|
|
|
|
|
&project_root,
|
|
|
|
|
story_id,
|
|
|
|
|
Some(agent_name),
|
|
|
|
|
Some(&context),
|
|
|
|
|
previous_session_id,
|
|
|
|
|
)
|
|
|
|
|
.await
|
|
|
|
|
{
|
|
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to restart coder '{agent_name}' for '{story_id}': {e}"
|
|
|
|
|
);
|
|
|
|
|
}
|
2026-03-28 11:47:36 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
PipelineStage::Qa => {
|
|
|
|
|
if completion.gates_passed {
|
|
|
|
|
// Run coverage gate in the QA worktree before advancing to merge.
|
|
|
|
|
let coverage_path = worktree_path
|
|
|
|
|
.clone()
|
|
|
|
|
.unwrap_or_else(|| project_root.clone());
|
|
|
|
|
let cp = coverage_path.clone();
|
2026-04-13 14:07:08 +00:00
|
|
|
let coverage_result = tokio::task::spawn_blocking(move || {
|
|
|
|
|
crate::agents::gates::run_coverage_gate(&cp)
|
|
|
|
|
})
|
|
|
|
|
.await
|
|
|
|
|
.unwrap_or_else(|e| {
|
|
|
|
|
slog_warn!("[pipeline] Coverage gate task panicked: {e}");
|
|
|
|
|
Ok((false, format!("Coverage gate task panicked: {e}")))
|
|
|
|
|
});
|
2026-03-28 11:47:36 +00:00
|
|
|
let (coverage_passed, coverage_output) = match coverage_result {
|
|
|
|
|
Ok(pair) => pair,
|
|
|
|
|
Err(e) => (false, e),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if coverage_passed {
|
|
|
|
|
// Check whether this item needs human review before merging.
|
|
|
|
|
let needs_human_review = {
|
|
|
|
|
let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
|
|
|
|
|
if item_type == "spike" {
|
|
|
|
|
true // Spikes always need human review.
|
|
|
|
|
} else {
|
|
|
|
|
let default_qa = config.default_qa_mode();
|
|
|
|
|
matches!(
|
2026-04-08 03:03:59 +00:00
|
|
|
resolve_qa_mode_from_store(&project_root, story_id, default_qa),
|
2026-03-28 11:47:36 +00:00
|
|
|
crate::io::story_metadata::QaMode::Human
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if needs_human_review {
|
|
|
|
|
// Hold in 3_qa/ for human review.
|
2026-04-08 03:03:59 +00:00
|
|
|
write_review_hold_to_store(story_id);
|
2026-03-28 11:47:36 +00:00
|
|
|
slog!(
|
|
|
|
|
"[pipeline] QA passed for '{story_id}'. \
|
|
|
|
|
Holding for human review. \
|
|
|
|
|
Worktree preserved at: {worktree_path:?}"
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] QA passed gates and coverage for '{story_id}'. \
|
|
|
|
|
Moving directly to merge."
|
|
|
|
|
);
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) = crate::agents::lifecycle::move_story_to_merge(story_id)
|
|
|
|
|
{
|
2026-03-28 11:47:36 +00:00
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to move '{story_id}' to 4_merge/: {e}"
|
|
|
|
|
);
|
2026-04-09 22:05:52 +00:00
|
|
|
} else {
|
2026-04-27 23:31:57 +00:00
|
|
|
self.trigger_server_side_merge(&project_root, story_id);
|
2026-03-28 11:47:36 +00:00
|
|
|
}
|
|
|
|
|
}
|
2026-04-13 14:07:08 +00:00
|
|
|
} else if let Some(reason) =
|
|
|
|
|
should_block_story(story_id, config.max_retries, "qa-coverage")
|
|
|
|
|
{
|
2026-03-28 11:47:36 +00:00
|
|
|
// Story has exceeded retry limit — do not restart.
|
|
|
|
|
let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
|
|
|
|
|
story_id: story_id.to_string(),
|
|
|
|
|
reason,
|
|
|
|
|
});
|
|
|
|
|
} else {
|
2026-04-08 03:03:59 +00:00
|
|
|
slog!(
|
|
|
|
|
"[pipeline] QA coverage gate failed for '{story_id}'. Restarting QA."
|
|
|
|
|
);
|
2026-03-28 11:47:36 +00:00
|
|
|
let context = format!(
|
2026-04-08 03:03:59 +00:00
|
|
|
"\n\n---\n## Coverage Gate Failed\n\
|
|
|
|
|
The coverage gate (script/test_coverage) failed with the following output:\n{}\n\n\
|
|
|
|
|
Please improve test coverage until the coverage gate passes.",
|
|
|
|
|
coverage_output
|
2026-03-28 11:47:36 +00:00
|
|
|
);
|
|
|
|
|
if let Err(e) = self
|
2026-04-12 12:52:46 +00:00
|
|
|
.start_agent(&project_root, story_id, Some("qa"), Some(&context), None)
|
2026-03-28 11:47:36 +00:00
|
|
|
.await
|
|
|
|
|
{
|
|
|
|
|
slog_error!("[pipeline] Failed to restart qa for '{story_id}': {e}");
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-04-08 03:03:59 +00:00
|
|
|
} else {
|
2026-04-29 22:48:28 +00:00
|
|
|
// Persist gate_output so the retry spawn can inject it into
|
|
|
|
|
// --append-system-prompt (story 881).
|
|
|
|
|
crate::db::write_content(
|
|
|
|
|
&format!("{story_id}:gate_output"),
|
|
|
|
|
&completion.gate_output,
|
2026-04-08 03:03:59 +00:00
|
|
|
);
|
2026-04-29 22:48:28 +00:00
|
|
|
if let Some(reason) = should_block_story(story_id, config.max_retries, "qa") {
|
|
|
|
|
// Story has exceeded retry limit — do not restart.
|
|
|
|
|
let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
|
|
|
|
|
story_id: story_id.to_string(),
|
|
|
|
|
reason,
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
slog!("[pipeline] QA failed gates for '{story_id}'. Restarting.");
|
|
|
|
|
let context = format!(
|
|
|
|
|
"\n\n---\n## Previous QA Attempt Failed\n\
|
|
|
|
|
The acceptance gates failed with the following output:\n{}\n\n\
|
|
|
|
|
Please re-run and fix the issues.",
|
|
|
|
|
completion.gate_output
|
|
|
|
|
);
|
|
|
|
|
if let Err(e) = self
|
|
|
|
|
.start_agent(&project_root, story_id, Some("qa"), Some(&context), None)
|
|
|
|
|
.await
|
|
|
|
|
{
|
|
|
|
|
slog_error!("[pipeline] Failed to restart qa for '{story_id}': {e}");
|
|
|
|
|
}
|
2026-04-08 03:03:59 +00:00
|
|
|
}
|
2026-03-28 11:47:36 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
PipelineStage::Mergemaster => {
|
2026-04-10 15:16:50 +00:00
|
|
|
// Bug 529: Guard against stale mergemaster advances. If the story
|
|
|
|
|
// has already reached done or archived (e.g. a previous mergemaster
|
|
|
|
|
// succeeded), this advance is a zombie — skip it entirely to avoid
|
|
|
|
|
// phantom notifications and redundant post-merge test runs.
|
2026-04-27 16:35:25 +00:00
|
|
|
if let Ok(Some(typed_item)) = crate::pipeline_state::read_typed(story_id)
|
|
|
|
|
&& matches!(
|
|
|
|
|
typed_item.stage,
|
|
|
|
|
crate::pipeline_state::Stage::Done { .. }
|
|
|
|
|
| crate::pipeline_state::Stage::Archived { .. }
|
|
|
|
|
)
|
|
|
|
|
{
|
2026-04-10 15:16:50 +00:00
|
|
|
let current_dir = typed_item.stage.dir_name();
|
2026-04-27 16:35:25 +00:00
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Skipping stale mergemaster advance for '{story_id}': \
|
|
|
|
|
story is already in work/{current_dir}/"
|
|
|
|
|
);
|
|
|
|
|
// Skip pipeline advancement — do not run post-merge tests,
|
|
|
|
|
// do not emit notifications, do not restart agents.
|
|
|
|
|
return;
|
2026-04-10 15:16:50 +00:00
|
|
|
}
|
|
|
|
|
|
2026-03-28 11:47:36 +00:00
|
|
|
// Block advancement if the mergemaster explicitly reported a failure.
|
|
|
|
|
// The server-owned gate check runs in the feature-branch worktree (not
|
|
|
|
|
// master), so `gates_passed=true` is misleading when no code was merged.
|
|
|
|
|
if merge_failure_reported {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Pipeline advancement blocked for '{story_id}': \
|
|
|
|
|
mergemaster explicitly reported a merge failure. \
|
|
|
|
|
Story stays in 4_merge/ for human review."
|
|
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
// Run script/test on master (project_root) as the post-merge verification.
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Mergemaster completed for '{story_id}'. Running post-merge tests on master."
|
|
|
|
|
);
|
|
|
|
|
let root = project_root.clone();
|
2026-04-13 14:07:08 +00:00
|
|
|
let test_result = tokio::task::spawn_blocking(move || {
|
|
|
|
|
crate::agents::gates::run_project_tests(&root)
|
|
|
|
|
})
|
|
|
|
|
.await
|
|
|
|
|
.unwrap_or_else(|e| {
|
|
|
|
|
slog_warn!("[pipeline] Post-merge test task panicked: {e}");
|
|
|
|
|
Ok((false, format!("Test task panicked: {e}")))
|
|
|
|
|
});
|
2026-03-28 11:47:36 +00:00
|
|
|
let (passed, output) = match test_result {
|
|
|
|
|
Ok(pair) => pair,
|
|
|
|
|
Err(e) => (false, e),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if passed {
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Post-merge tests passed for '{story_id}'. Moving to done."
|
|
|
|
|
);
|
2026-04-27 19:51:27 +00:00
|
|
|
if let Err(e) = crate::agents::lifecycle::move_story_to_done(story_id) {
|
2026-03-28 11:47:36 +00:00
|
|
|
slog_error!("[pipeline] Failed to move '{story_id}' to done: {e}");
|
|
|
|
|
}
|
|
|
|
|
self.remove_agents_for_story(story_id);
|
|
|
|
|
// TODO: Re-enable worktree cleanup once we have persistent agent logs.
|
|
|
|
|
// Removing worktrees destroys evidence needed to debug empty-commit agents.
|
|
|
|
|
// let config =
|
|
|
|
|
// crate::config::ProjectConfig::load(&project_root).unwrap_or_default();
|
|
|
|
|
// if let Err(e) =
|
|
|
|
|
// worktree::remove_worktree_by_story_id(&project_root, story_id, &config)
|
|
|
|
|
// .await
|
|
|
|
|
// {
|
|
|
|
|
// slog!(
|
|
|
|
|
// "[pipeline] Failed to remove worktree for '{story_id}': {e}"
|
|
|
|
|
// );
|
|
|
|
|
// }
|
|
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Story '{story_id}' done. Worktree preserved for inspection."
|
|
|
|
|
);
|
2026-04-13 14:07:08 +00:00
|
|
|
} else if let Some(reason) =
|
|
|
|
|
should_block_story(story_id, config.max_retries, "mergemaster")
|
|
|
|
|
{
|
2026-04-08 03:03:59 +00:00
|
|
|
// Story has exceeded retry limit — do not restart.
|
|
|
|
|
let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
|
|
|
|
|
story_id: story_id.to_string(),
|
|
|
|
|
reason,
|
|
|
|
|
});
|
2026-03-28 11:47:36 +00:00
|
|
|
} else {
|
2026-04-08 03:03:59 +00:00
|
|
|
slog!(
|
|
|
|
|
"[pipeline] Post-merge tests failed for '{story_id}'. Restarting mergemaster."
|
|
|
|
|
);
|
|
|
|
|
let context = format!(
|
|
|
|
|
"\n\n---\n## Post-Merge Test Failed\n\
|
|
|
|
|
The tests on master failed with the following output:\n{}\n\n\
|
|
|
|
|
Please investigate and resolve the failures, then call merge_agent_work again.",
|
|
|
|
|
output
|
|
|
|
|
);
|
|
|
|
|
if let Err(e) = self
|
|
|
|
|
.start_agent(
|
|
|
|
|
&project_root,
|
|
|
|
|
story_id,
|
|
|
|
|
Some("mergemaster"),
|
|
|
|
|
Some(&context),
|
2026-04-12 12:52:46 +00:00
|
|
|
None,
|
2026-04-08 03:03:59 +00:00
|
|
|
)
|
|
|
|
|
.await
|
|
|
|
|
{
|
|
|
|
|
slog_error!(
|
|
|
|
|
"[pipeline] Failed to restart mergemaster for '{story_id}': {e}"
|
2026-03-28 11:47:36 +00:00
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Always scan for unassigned work after any agent completes, regardless
|
|
|
|
|
// of the outcome (success, failure, restart). This ensures stories that
|
|
|
|
|
// failed agent assignment due to busy agents are retried when agents
|
|
|
|
|
// become available (bug 295).
|
|
|
|
|
self.auto_assign_available_work(&project_root).await;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Spawn pipeline advancement as a background task.
|
|
|
|
|
///
|
|
|
|
|
/// This is a **non-async** function so it does not participate in the opaque
|
|
|
|
|
/// type cycle between `start_agent` and `run_server_owned_completion`.
|
2026-04-26 21:35:04 +00:00
|
|
|
mod helpers;
|
2026-03-28 11:47:36 +00:00
|
|
|
|
2026-04-26 21:35:04 +00:00
|
|
|
use helpers::{resolve_qa_mode_from_store, write_review_hold_to_store};
|
2026-04-27 01:32:08 +00:00
|
|
|
pub(crate) use helpers::{should_block_story, spawn_pipeline_advance};
|
2026-03-28 11:47:36 +00:00
|
|
|
|
2026-04-27 17:29:11 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests;
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests_regression;
|