huskies: merge 981

This commit is contained in:
dave
2026-05-13 13:54:27 +00:00
parent 51aa649ce4
commit e9a7468d8a
4 changed files with 194 additions and 2 deletions
@@ -131,6 +131,78 @@ impl AgentPool {
); );
} }
} }
} else if crate::db::read_content(crate::db::ContentKey::MergeFixupPending(
story_id,
))
.is_some()
{
// Merge gate fixup coder completed (story 981).
// Route back to merge on success, or to MergeFailure on failure.
// Neither path counts against retry_count (AC4).
crate::db::delete_content(crate::db::ContentKey::MergeFixupPending(story_id));
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
story_id,
));
// The FixupRequested transition set retry_count=1 so the gate output
// was injected into the spawn. Reset to 0 now so the fixup does not
// consume a retry slot (AC4).
crate::crdt_state::set_retry_count(story_id, 0);
if completion.gates_passed {
slog!(
"[pipeline] Merge fixup coder '{agent_name}' passed gates for \
'{story_id}'. Re-triggering merge."
);
if let Err(e) = crate::agents::lifecycle::move_story_to_merge(story_id) {
slog_error!(
"[pipeline] Failed to move '{story_id}' to 4_merge/ after \
fixup: {e}"
);
} else {
self.trigger_server_side_merge(&project_root, story_id);
}
} else {
slog!(
"[pipeline] Merge fixup coder '{agent_name}' failed gates for \
'{story_id}'. Transitioning back to MergeFailure."
);
// Two-step: Coding → Merge → MergeFailure.
// feature_branch follows the project convention; commits_ahead=1
// is a safe approximation (the actual count doesn't matter here —
// it is only used to reconstruct Merge via Unblock if a human
// later retries).
let branch =
crate::pipeline_state::BranchName(format!("feature/story-{story_id}"));
let commits_ahead = std::num::NonZeroU32::new(1).unwrap();
let qa_skip = crate::pipeline_state::PipelineEvent::QaSkipped {
feature_branch: branch,
commits_ahead,
};
if let Err(e) =
crate::pipeline_state::apply_transition_str(story_id, qa_skip, None)
{
slog_error!(
"[pipeline] Failed to move '{story_id}' Coding→Merge for \
fixup failure: {e}"
);
}
let reason = format!(
"Merge fixup coder could not resolve gate failures: {}",
truncate_gate_output(&completion.gate_output)
);
if let Err(e) =
crate::agents::lifecycle::transition_to_merge_failure(story_id, &reason)
{
slog_error!(
"[pipeline] Failed to transition '{story_id}' to MergeFailure \
after fixup failure: {e}"
);
}
let _ = self.watcher_tx.send(WatcherEvent::MergeFailure {
story_id: story_id.to_string(),
reason,
});
}
} else if completion.gates_passed { } else if completion.gates_passed {
// Clear any stale recovery key when the coder succeeds normally. // Clear any stale recovery key when the coder succeeds normally.
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending( crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
@@ -1,9 +1,28 @@
//! Merge pipeline runner — start_merge_agent_work and run_merge_pipeline. //! Merge pipeline runner — start_merge_agent_work and run_merge_pipeline.
use crate::slog; use crate::slog;
use crate::slog_error;
use crate::worktree; use crate::worktree;
use std::path::Path; use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
/// Return `true` when `gate_output` matches a self-evident-fix class of failure
/// that a short fixup coder session can resolve without human intervention.
///
/// Patterns covered: fmt drift (`cargo fmt --check`), clippy warnings promoted
/// to errors (`-D warnings`), and missing doc comments detected by clippy or
/// the source-map-check gate.
fn is_self_evident_fix(gate_output: &str) -> bool {
let patterns: &[&str] = &[
"Diff in ", // cargo fmt --check output
"would reformat", // rustfmt --check output
"error[clippy::", // clippy error
"warning[clippy::", // clippy warning (treated as error via -D warnings)
"missing_doc_comments", // clippy missing-doc lint
"missing-docs direction", // source-map-check gate
];
patterns.iter().any(|p| gate_output.contains(p))
}
use super::super::super::AgentPool; use super::super::super::AgentPool;
use super::time::{ use super::time::{
decode_server_start_time, encode_server_start_time, server_start_time, unix_now, decode_server_start_time, encode_server_start_time, server_start_time, unix_now,
@@ -114,9 +133,18 @@ impl AgentPool {
Err(e) => e.clone(), Err(e) => e.clone(),
}; };
let is_no_commits = reason.contains("no commits to merge"); let is_no_commits = reason.contains("no commits to merge");
// Self-evident fix: gate-only failure (no conflicts) whose output matches
// a pattern a fixup coder can resolve in one short session (story 981).
let gate_output = match &report {
Ok(r) if !r.had_conflicts => r.gate_output.clone(),
_ => String::new(),
};
let is_fixup =
!is_no_commits && !gate_output.is_empty() && is_self_evident_fix(&gate_output);
if is_no_commits { if is_no_commits {
if let Err(e) = crate::agents::lifecycle::transition_to_blocked(&sid, &reason) { if let Err(e) = crate::agents::lifecycle::transition_to_blocked(&sid, &reason) {
crate::slog_error!("[merge] Failed to transition '{sid}' to Blocked: {e}"); slog_error!("[merge] Failed to transition '{sid}' to Blocked: {e}");
} }
let _ = pool let _ = pool
.watcher_tx .watcher_tx
@@ -124,6 +152,50 @@ impl AgentPool {
story_id: sid.clone(), story_id: sid.clone(),
reason, reason,
}); });
} else if is_fixup {
// Save gate output and mark fixup pending before any state transition
// so that a concurrent auto-assign that fires after the state change
// sees the keys already set.
crate::db::write_content(crate::db::ContentKey::GateOutput(&sid), &gate_output);
crate::db::write_content(crate::db::ContentKey::MergeFixupPending(&sid), "1");
// Merge → MergeFailure → Coding. FixupRequested also sets
// retry_count=1 so maybe_inject_gate_failure injects the gate
// output into --append-system-prompt on the fixup spawn.
let _ = crate::agents::lifecycle::transition_to_merge_failure(&sid, &reason);
match crate::agents::lifecycle::move_story_to_stage(&sid, "current") {
Ok(_) => {
slog!(
"[merge] Self-evident gate fix for '{sid}'; spawning fixup coder"
);
let context = "\n\nYour task is to fix the merge gate failures \
shown above (see --append-system-prompt). \
Run run_tests then commit. Do not explore further.";
if let Err(e) = pool
.start_agent(&root, &sid, None, Some(context), None)
.await
{
slog_error!(
"[merge] Fixup coder spawn failed for '{sid}': {e} \
(auto-assign will retry when a slot opens)"
);
}
}
Err(e) => {
slog_error!(
"[merge] Failed to move '{sid}' back to current for fixup: {e}; \
reverting to MergeFailure"
);
crate::db::delete_content(crate::db::ContentKey::MergeFixupPending(
&sid,
));
let _ = pool.watcher_tx.send(
crate::io::watcher::WatcherEvent::MergeFailure {
story_id: sid.clone(),
reason,
},
);
}
}
} else { } else {
// Transition through the state machine (Merge → MergeFailure). // Transition through the state machine (Merge → MergeFailure).
// Only send the notification when the stage actually changed; if the // Only send the notification when the stage actually changed; if the
@@ -136,7 +208,7 @@ impl AgentPool {
crate::pipeline_state::Stage::MergeFailure { .. } crate::pipeline_state::Stage::MergeFailure { .. }
), ),
Err(e) => { Err(e) => {
crate::slog_error!( slog_error!(
"[merge] Failed to transition '{sid}' to MergeFailure: {e}" "[merge] Failed to transition '{sid}' to MergeFailure: {e}"
); );
true true
+40
View File
@@ -64,6 +64,44 @@ fn inject_gate_failure_section(args: &mut Vec<String>, gate_output: &str) {
args.push(section); args.push(section);
} }
/// Cap `--max-turns` and `--max-budget-usd` for merge-gate fixup sessions (story 981).
///
/// When [`ContentKey::MergeFixupPending`] is set the fixup coder must not run
/// longer than 20 turns or spend more than $1. If the agent config already
/// specifies lower values those are preserved; otherwise the fixup caps take
/// precedence by overriding the args in place.
pub(super) fn maybe_cap_for_merge_fixup(args: &mut Vec<String>, story_id: &str) {
if crate::db::read_content(crate::db::ContentKey::MergeFixupPending(story_id)).is_none() {
return;
}
// Override --max-turns: set to 20 unless already lower.
const FIXUP_MAX_TURNS: u32 = 20;
if let Some(pos) = args.iter().position(|a| a == "--max-turns") {
if let Some(val) = args.get_mut(pos + 1) {
let current: u32 = val.parse().unwrap_or(u32::MAX);
if current > FIXUP_MAX_TURNS {
*val = FIXUP_MAX_TURNS.to_string();
}
}
} else {
args.push("--max-turns".to_string());
args.push(FIXUP_MAX_TURNS.to_string());
}
// Override --max-budget-usd: set to 1.0 unless already lower.
const FIXUP_MAX_BUDGET: f64 = 1.0;
if let Some(pos) = args.iter().position(|a| a == "--max-budget-usd") {
if let Some(val) = args.get_mut(pos + 1) {
let current: f64 = val.parse().unwrap_or(f64::MAX);
if current > FIXUP_MAX_BUDGET {
*val = FIXUP_MAX_BUDGET.to_string();
}
}
} else {
args.push("--max-budget-usd".to_string());
args.push(FIXUP_MAX_BUDGET.to_string());
}
}
/// On retry spawns (retry_count > 0), read the stored gate_output from the DB /// On retry spawns (retry_count > 0), read the stored gate_output from the DB
/// and inject it into `--append-system-prompt` so the agent always sees the /// and inject it into `--append-system-prompt` so the agent always sees the
/// prior failure context, even when session-resuming (story 881). /// prior failure context, even when session-resuming (story 881).
@@ -216,6 +254,8 @@ pub(super) async fn run_agent_spawn(
// On retry spawns (retry_count > 0), inject prior gate failure output into // On retry spawns (retry_count > 0), inject prior gate failure output into
// --append-system-prompt so the agent always sees the failure context (story 881). // --append-system-prompt so the agent always sees the failure context (story 881).
maybe_inject_gate_failure(&mut args, &sid); maybe_inject_gate_failure(&mut args, &sid);
// Cap turns and budget for merge-gate fixup sessions (story 981).
maybe_cap_for_merge_fixup(&mut args, &sid);
// Append project-local prompt content (.huskies/AGENT.md) to the // Append project-local prompt content (.huskies/AGENT.md) to the
// baked-in prompt so every agent role sees project-specific guidance // baked-in prompt so every agent role sees project-specific guidance
+8
View File
@@ -26,6 +26,13 @@ pub enum ContentKey<'a> {
RunTestsOk(&'a str), RunTestsOk(&'a str),
/// Flag indicating a commit-recovery respawn is in progress. /// Flag indicating a commit-recovery respawn is in progress.
CommitRecoveryPending(&'a str), CommitRecoveryPending(&'a str),
/// Flag indicating a merge gate fixup coder session is in progress.
///
/// Set when the merge gate fails with a self-evident-fix class of failure
/// (fmt drift, clippy warning, missing doc) so the pipeline advance handler
/// can route the fixup coder's completion directly back to merge instead of
/// through the normal QA path (story 981).
MergeFixupPending(&'a str),
} }
impl<'a> ContentKey<'a> { impl<'a> ContentKey<'a> {
@@ -41,6 +48,7 @@ impl<'a> ContentKey<'a> {
ContentKey::MergeMasterSpawnCount(id) => format!("{id}:mergemaster_spawn_count"), ContentKey::MergeMasterSpawnCount(id) => format!("{id}:mergemaster_spawn_count"),
ContentKey::RunTestsOk(id) => format!("{id}:run_tests_ok"), ContentKey::RunTestsOk(id) => format!("{id}:run_tests_ok"),
ContentKey::CommitRecoveryPending(id) => format!("{id}:commit_recovery_pending"), ContentKey::CommitRecoveryPending(id) => format!("{id}:commit_recovery_pending"),
ContentKey::MergeFixupPending(id) => format!("{id}:merge_fixup_pending"),
} }
} }
} }