From e9a7468d8af4d2cc7ac597c94ce90f80faf071c3 Mon Sep 17 00:00:00 2001 From: dave Date: Wed, 13 May 2026 13:54:27 +0000 Subject: [PATCH] huskies: merge 981 --- .../src/agents/pool/pipeline/advance/mod.rs | 72 ++++++++++++++++++ .../src/agents/pool/pipeline/merge/runner.rs | 76 ++++++++++++++++++- server/src/agents/pool/start/spawn.rs | 40 ++++++++++ server/src/db/content_store.rs | 8 ++ 4 files changed, 194 insertions(+), 2 deletions(-) diff --git a/server/src/agents/pool/pipeline/advance/mod.rs b/server/src/agents/pool/pipeline/advance/mod.rs index e6ac77c5..06669ca1 100644 --- a/server/src/agents/pool/pipeline/advance/mod.rs +++ b/server/src/agents/pool/pipeline/advance/mod.rs @@ -131,6 +131,78 @@ impl AgentPool { ); } } + } else if crate::db::read_content(crate::db::ContentKey::MergeFixupPending( + story_id, + )) + .is_some() + { + // Merge gate fixup coder completed (story 981). + // Route back to merge on success, or to MergeFailure on failure. + // Neither path counts against retry_count (AC4). + crate::db::delete_content(crate::db::ContentKey::MergeFixupPending(story_id)); + crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending( + story_id, + )); + // The FixupRequested transition set retry_count=1 so the gate output + // was injected into the spawn. Reset to 0 now so the fixup does not + // consume a retry slot (AC4). + crate::crdt_state::set_retry_count(story_id, 0); + + if completion.gates_passed { + slog!( + "[pipeline] Merge fixup coder '{agent_name}' passed gates for \ + '{story_id}'. Re-triggering merge." + ); + if let Err(e) = crate::agents::lifecycle::move_story_to_merge(story_id) { + slog_error!( + "[pipeline] Failed to move '{story_id}' to 4_merge/ after \ + fixup: {e}" + ); + } else { + self.trigger_server_side_merge(&project_root, story_id); + } + } else { + slog!( + "[pipeline] Merge fixup coder '{agent_name}' failed gates for \ + '{story_id}'. Transitioning back to MergeFailure." + ); + // Two-step: Coding → Merge → MergeFailure. + // feature_branch follows the project convention; commits_ahead=1 + // is a safe approximation (the actual count doesn't matter here — + // it is only used to reconstruct Merge via Unblock if a human + // later retries). + let branch = + crate::pipeline_state::BranchName(format!("feature/story-{story_id}")); + let commits_ahead = std::num::NonZeroU32::new(1).unwrap(); + let qa_skip = crate::pipeline_state::PipelineEvent::QaSkipped { + feature_branch: branch, + commits_ahead, + }; + if let Err(e) = + crate::pipeline_state::apply_transition_str(story_id, qa_skip, None) + { + slog_error!( + "[pipeline] Failed to move '{story_id}' Coding→Merge for \ + fixup failure: {e}" + ); + } + let reason = format!( + "Merge fixup coder could not resolve gate failures: {}", + truncate_gate_output(&completion.gate_output) + ); + if let Err(e) = + crate::agents::lifecycle::transition_to_merge_failure(story_id, &reason) + { + slog_error!( + "[pipeline] Failed to transition '{story_id}' to MergeFailure \ + after fixup failure: {e}" + ); + } + let _ = self.watcher_tx.send(WatcherEvent::MergeFailure { + story_id: story_id.to_string(), + reason, + }); + } } else if completion.gates_passed { // Clear any stale recovery key when the coder succeeds normally. crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending( diff --git a/server/src/agents/pool/pipeline/merge/runner.rs b/server/src/agents/pool/pipeline/merge/runner.rs index 4f8e84db..93aea7f6 100644 --- a/server/src/agents/pool/pipeline/merge/runner.rs +++ b/server/src/agents/pool/pipeline/merge/runner.rs @@ -1,9 +1,28 @@ //! Merge pipeline runner — start_merge_agent_work and run_merge_pipeline. use crate::slog; +use crate::slog_error; use crate::worktree; use std::path::Path; use std::sync::Arc; +/// Return `true` when `gate_output` matches a self-evident-fix class of failure +/// that a short fixup coder session can resolve without human intervention. +/// +/// Patterns covered: fmt drift (`cargo fmt --check`), clippy warnings promoted +/// to errors (`-D warnings`), and missing doc comments detected by clippy or +/// the source-map-check gate. +fn is_self_evident_fix(gate_output: &str) -> bool { + let patterns: &[&str] = &[ + "Diff in ", // cargo fmt --check output + "would reformat", // rustfmt --check output + "error[clippy::", // clippy error + "warning[clippy::", // clippy warning (treated as error via -D warnings) + "missing_doc_comments", // clippy missing-doc lint + "missing-docs direction", // source-map-check gate + ]; + patterns.iter().any(|p| gate_output.contains(p)) +} + use super::super::super::AgentPool; use super::time::{ decode_server_start_time, encode_server_start_time, server_start_time, unix_now, @@ -114,9 +133,18 @@ impl AgentPool { Err(e) => e.clone(), }; let is_no_commits = reason.contains("no commits to merge"); + // Self-evident fix: gate-only failure (no conflicts) whose output matches + // a pattern a fixup coder can resolve in one short session (story 981). + let gate_output = match &report { + Ok(r) if !r.had_conflicts => r.gate_output.clone(), + _ => String::new(), + }; + let is_fixup = + !is_no_commits && !gate_output.is_empty() && is_self_evident_fix(&gate_output); + if is_no_commits { if let Err(e) = crate::agents::lifecycle::transition_to_blocked(&sid, &reason) { - crate::slog_error!("[merge] Failed to transition '{sid}' to Blocked: {e}"); + slog_error!("[merge] Failed to transition '{sid}' to Blocked: {e}"); } let _ = pool .watcher_tx @@ -124,6 +152,50 @@ impl AgentPool { story_id: sid.clone(), reason, }); + } else if is_fixup { + // Save gate output and mark fixup pending before any state transition + // so that a concurrent auto-assign that fires after the state change + // sees the keys already set. + crate::db::write_content(crate::db::ContentKey::GateOutput(&sid), &gate_output); + crate::db::write_content(crate::db::ContentKey::MergeFixupPending(&sid), "1"); + // Merge → MergeFailure → Coding. FixupRequested also sets + // retry_count=1 so maybe_inject_gate_failure injects the gate + // output into --append-system-prompt on the fixup spawn. + let _ = crate::agents::lifecycle::transition_to_merge_failure(&sid, &reason); + match crate::agents::lifecycle::move_story_to_stage(&sid, "current") { + Ok(_) => { + slog!( + "[merge] Self-evident gate fix for '{sid}'; spawning fixup coder" + ); + let context = "\n\nYour task is to fix the merge gate failures \ + shown above (see --append-system-prompt). \ + Run run_tests then commit. Do not explore further."; + if let Err(e) = pool + .start_agent(&root, &sid, None, Some(context), None) + .await + { + slog_error!( + "[merge] Fixup coder spawn failed for '{sid}': {e} \ + (auto-assign will retry when a slot opens)" + ); + } + } + Err(e) => { + slog_error!( + "[merge] Failed to move '{sid}' back to current for fixup: {e}; \ + reverting to MergeFailure" + ); + crate::db::delete_content(crate::db::ContentKey::MergeFixupPending( + &sid, + )); + let _ = pool.watcher_tx.send( + crate::io::watcher::WatcherEvent::MergeFailure { + story_id: sid.clone(), + reason, + }, + ); + } + } } else { // Transition through the state machine (Merge → MergeFailure). // Only send the notification when the stage actually changed; if the @@ -136,7 +208,7 @@ impl AgentPool { crate::pipeline_state::Stage::MergeFailure { .. } ), Err(e) => { - crate::slog_error!( + slog_error!( "[merge] Failed to transition '{sid}' to MergeFailure: {e}" ); true diff --git a/server/src/agents/pool/start/spawn.rs b/server/src/agents/pool/start/spawn.rs index 22521f51..13a1fa66 100644 --- a/server/src/agents/pool/start/spawn.rs +++ b/server/src/agents/pool/start/spawn.rs @@ -64,6 +64,44 @@ fn inject_gate_failure_section(args: &mut Vec, gate_output: &str) { args.push(section); } +/// Cap `--max-turns` and `--max-budget-usd` for merge-gate fixup sessions (story 981). +/// +/// When [`ContentKey::MergeFixupPending`] is set the fixup coder must not run +/// longer than 20 turns or spend more than $1. If the agent config already +/// specifies lower values those are preserved; otherwise the fixup caps take +/// precedence by overriding the args in place. +pub(super) fn maybe_cap_for_merge_fixup(args: &mut Vec, story_id: &str) { + if crate::db::read_content(crate::db::ContentKey::MergeFixupPending(story_id)).is_none() { + return; + } + // Override --max-turns: set to 20 unless already lower. + const FIXUP_MAX_TURNS: u32 = 20; + if let Some(pos) = args.iter().position(|a| a == "--max-turns") { + if let Some(val) = args.get_mut(pos + 1) { + let current: u32 = val.parse().unwrap_or(u32::MAX); + if current > FIXUP_MAX_TURNS { + *val = FIXUP_MAX_TURNS.to_string(); + } + } + } else { + args.push("--max-turns".to_string()); + args.push(FIXUP_MAX_TURNS.to_string()); + } + // Override --max-budget-usd: set to 1.0 unless already lower. + const FIXUP_MAX_BUDGET: f64 = 1.0; + if let Some(pos) = args.iter().position(|a| a == "--max-budget-usd") { + if let Some(val) = args.get_mut(pos + 1) { + let current: f64 = val.parse().unwrap_or(f64::MAX); + if current > FIXUP_MAX_BUDGET { + *val = FIXUP_MAX_BUDGET.to_string(); + } + } + } else { + args.push("--max-budget-usd".to_string()); + args.push(FIXUP_MAX_BUDGET.to_string()); + } +} + /// On retry spawns (retry_count > 0), read the stored gate_output from the DB /// and inject it into `--append-system-prompt` so the agent always sees the /// prior failure context, even when session-resuming (story 881). @@ -216,6 +254,8 @@ pub(super) async fn run_agent_spawn( // On retry spawns (retry_count > 0), inject prior gate failure output into // --append-system-prompt so the agent always sees the failure context (story 881). maybe_inject_gate_failure(&mut args, &sid); + // Cap turns and budget for merge-gate fixup sessions (story 981). + maybe_cap_for_merge_fixup(&mut args, &sid); // Append project-local prompt content (.huskies/AGENT.md) to the // baked-in prompt so every agent role sees project-specific guidance diff --git a/server/src/db/content_store.rs b/server/src/db/content_store.rs index 203df166..a12668f6 100644 --- a/server/src/db/content_store.rs +++ b/server/src/db/content_store.rs @@ -26,6 +26,13 @@ pub enum ContentKey<'a> { RunTestsOk(&'a str), /// Flag indicating a commit-recovery respawn is in progress. CommitRecoveryPending(&'a str), + /// Flag indicating a merge gate fixup coder session is in progress. + /// + /// Set when the merge gate fails with a self-evident-fix class of failure + /// (fmt drift, clippy warning, missing doc) so the pipeline advance handler + /// can route the fixup coder's completion directly back to merge instead of + /// through the normal QA path (story 981). + MergeFixupPending(&'a str), } impl<'a> ContentKey<'a> { @@ -41,6 +48,7 @@ impl<'a> ContentKey<'a> { ContentKey::MergeMasterSpawnCount(id) => format!("{id}:mergemaster_spawn_count"), ContentKey::RunTestsOk(id) => format!("{id}:run_tests_ok"), ContentKey::CommitRecoveryPending(id) => format!("{id}:commit_recovery_pending"), + ContentKey::MergeFixupPending(id) => format!("{id}:merge_fixup_pending"), } } }