huskies: merge 981
This commit is contained in:
@@ -131,6 +131,78 @@ impl AgentPool {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if crate::db::read_content(crate::db::ContentKey::MergeFixupPending(
|
||||||
|
story_id,
|
||||||
|
))
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
// Merge gate fixup coder completed (story 981).
|
||||||
|
// Route back to merge on success, or to MergeFailure on failure.
|
||||||
|
// Neither path counts against retry_count (AC4).
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::MergeFixupPending(story_id));
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
|
||||||
|
story_id,
|
||||||
|
));
|
||||||
|
// The FixupRequested transition set retry_count=1 so the gate output
|
||||||
|
// was injected into the spawn. Reset to 0 now so the fixup does not
|
||||||
|
// consume a retry slot (AC4).
|
||||||
|
crate::crdt_state::set_retry_count(story_id, 0);
|
||||||
|
|
||||||
|
if completion.gates_passed {
|
||||||
|
slog!(
|
||||||
|
"[pipeline] Merge fixup coder '{agent_name}' passed gates for \
|
||||||
|
'{story_id}'. Re-triggering merge."
|
||||||
|
);
|
||||||
|
if let Err(e) = crate::agents::lifecycle::move_story_to_merge(story_id) {
|
||||||
|
slog_error!(
|
||||||
|
"[pipeline] Failed to move '{story_id}' to 4_merge/ after \
|
||||||
|
fixup: {e}"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
self.trigger_server_side_merge(&project_root, story_id);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
slog!(
|
||||||
|
"[pipeline] Merge fixup coder '{agent_name}' failed gates for \
|
||||||
|
'{story_id}'. Transitioning back to MergeFailure."
|
||||||
|
);
|
||||||
|
// Two-step: Coding → Merge → MergeFailure.
|
||||||
|
// feature_branch follows the project convention; commits_ahead=1
|
||||||
|
// is a safe approximation (the actual count doesn't matter here —
|
||||||
|
// it is only used to reconstruct Merge via Unblock if a human
|
||||||
|
// later retries).
|
||||||
|
let branch =
|
||||||
|
crate::pipeline_state::BranchName(format!("feature/story-{story_id}"));
|
||||||
|
let commits_ahead = std::num::NonZeroU32::new(1).unwrap();
|
||||||
|
let qa_skip = crate::pipeline_state::PipelineEvent::QaSkipped {
|
||||||
|
feature_branch: branch,
|
||||||
|
commits_ahead,
|
||||||
|
};
|
||||||
|
if let Err(e) =
|
||||||
|
crate::pipeline_state::apply_transition_str(story_id, qa_skip, None)
|
||||||
|
{
|
||||||
|
slog_error!(
|
||||||
|
"[pipeline] Failed to move '{story_id}' Coding→Merge for \
|
||||||
|
fixup failure: {e}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let reason = format!(
|
||||||
|
"Merge fixup coder could not resolve gate failures: {}",
|
||||||
|
truncate_gate_output(&completion.gate_output)
|
||||||
|
);
|
||||||
|
if let Err(e) =
|
||||||
|
crate::agents::lifecycle::transition_to_merge_failure(story_id, &reason)
|
||||||
|
{
|
||||||
|
slog_error!(
|
||||||
|
"[pipeline] Failed to transition '{story_id}' to MergeFailure \
|
||||||
|
after fixup failure: {e}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let _ = self.watcher_tx.send(WatcherEvent::MergeFailure {
|
||||||
|
story_id: story_id.to_string(),
|
||||||
|
reason,
|
||||||
|
});
|
||||||
|
}
|
||||||
} else if completion.gates_passed {
|
} else if completion.gates_passed {
|
||||||
// Clear any stale recovery key when the coder succeeds normally.
|
// Clear any stale recovery key when the coder succeeds normally.
|
||||||
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
|
||||||
|
|||||||
@@ -1,9 +1,28 @@
|
|||||||
//! Merge pipeline runner — start_merge_agent_work and run_merge_pipeline.
|
//! Merge pipeline runner — start_merge_agent_work and run_merge_pipeline.
|
||||||
use crate::slog;
|
use crate::slog;
|
||||||
|
use crate::slog_error;
|
||||||
use crate::worktree;
|
use crate::worktree;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
/// Return `true` when `gate_output` matches a self-evident-fix class of failure
|
||||||
|
/// that a short fixup coder session can resolve without human intervention.
|
||||||
|
///
|
||||||
|
/// Patterns covered: fmt drift (`cargo fmt --check`), clippy warnings promoted
|
||||||
|
/// to errors (`-D warnings`), and missing doc comments detected by clippy or
|
||||||
|
/// the source-map-check gate.
|
||||||
|
fn is_self_evident_fix(gate_output: &str) -> bool {
|
||||||
|
let patterns: &[&str] = &[
|
||||||
|
"Diff in ", // cargo fmt --check output
|
||||||
|
"would reformat", // rustfmt --check output
|
||||||
|
"error[clippy::", // clippy error
|
||||||
|
"warning[clippy::", // clippy warning (treated as error via -D warnings)
|
||||||
|
"missing_doc_comments", // clippy missing-doc lint
|
||||||
|
"missing-docs direction", // source-map-check gate
|
||||||
|
];
|
||||||
|
patterns.iter().any(|p| gate_output.contains(p))
|
||||||
|
}
|
||||||
|
|
||||||
use super::super::super::AgentPool;
|
use super::super::super::AgentPool;
|
||||||
use super::time::{
|
use super::time::{
|
||||||
decode_server_start_time, encode_server_start_time, server_start_time, unix_now,
|
decode_server_start_time, encode_server_start_time, server_start_time, unix_now,
|
||||||
@@ -114,9 +133,18 @@ impl AgentPool {
|
|||||||
Err(e) => e.clone(),
|
Err(e) => e.clone(),
|
||||||
};
|
};
|
||||||
let is_no_commits = reason.contains("no commits to merge");
|
let is_no_commits = reason.contains("no commits to merge");
|
||||||
|
// Self-evident fix: gate-only failure (no conflicts) whose output matches
|
||||||
|
// a pattern a fixup coder can resolve in one short session (story 981).
|
||||||
|
let gate_output = match &report {
|
||||||
|
Ok(r) if !r.had_conflicts => r.gate_output.clone(),
|
||||||
|
_ => String::new(),
|
||||||
|
};
|
||||||
|
let is_fixup =
|
||||||
|
!is_no_commits && !gate_output.is_empty() && is_self_evident_fix(&gate_output);
|
||||||
|
|
||||||
if is_no_commits {
|
if is_no_commits {
|
||||||
if let Err(e) = crate::agents::lifecycle::transition_to_blocked(&sid, &reason) {
|
if let Err(e) = crate::agents::lifecycle::transition_to_blocked(&sid, &reason) {
|
||||||
crate::slog_error!("[merge] Failed to transition '{sid}' to Blocked: {e}");
|
slog_error!("[merge] Failed to transition '{sid}' to Blocked: {e}");
|
||||||
}
|
}
|
||||||
let _ = pool
|
let _ = pool
|
||||||
.watcher_tx
|
.watcher_tx
|
||||||
@@ -124,6 +152,50 @@ impl AgentPool {
|
|||||||
story_id: sid.clone(),
|
story_id: sid.clone(),
|
||||||
reason,
|
reason,
|
||||||
});
|
});
|
||||||
|
} else if is_fixup {
|
||||||
|
// Save gate output and mark fixup pending before any state transition
|
||||||
|
// so that a concurrent auto-assign that fires after the state change
|
||||||
|
// sees the keys already set.
|
||||||
|
crate::db::write_content(crate::db::ContentKey::GateOutput(&sid), &gate_output);
|
||||||
|
crate::db::write_content(crate::db::ContentKey::MergeFixupPending(&sid), "1");
|
||||||
|
// Merge → MergeFailure → Coding. FixupRequested also sets
|
||||||
|
// retry_count=1 so maybe_inject_gate_failure injects the gate
|
||||||
|
// output into --append-system-prompt on the fixup spawn.
|
||||||
|
let _ = crate::agents::lifecycle::transition_to_merge_failure(&sid, &reason);
|
||||||
|
match crate::agents::lifecycle::move_story_to_stage(&sid, "current") {
|
||||||
|
Ok(_) => {
|
||||||
|
slog!(
|
||||||
|
"[merge] Self-evident gate fix for '{sid}'; spawning fixup coder"
|
||||||
|
);
|
||||||
|
let context = "\n\nYour task is to fix the merge gate failures \
|
||||||
|
shown above (see --append-system-prompt). \
|
||||||
|
Run run_tests then commit. Do not explore further.";
|
||||||
|
if let Err(e) = pool
|
||||||
|
.start_agent(&root, &sid, None, Some(context), None)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
slog_error!(
|
||||||
|
"[merge] Fixup coder spawn failed for '{sid}': {e} \
|
||||||
|
(auto-assign will retry when a slot opens)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
slog_error!(
|
||||||
|
"[merge] Failed to move '{sid}' back to current for fixup: {e}; \
|
||||||
|
reverting to MergeFailure"
|
||||||
|
);
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::MergeFixupPending(
|
||||||
|
&sid,
|
||||||
|
));
|
||||||
|
let _ = pool.watcher_tx.send(
|
||||||
|
crate::io::watcher::WatcherEvent::MergeFailure {
|
||||||
|
story_id: sid.clone(),
|
||||||
|
reason,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Transition through the state machine (Merge → MergeFailure).
|
// Transition through the state machine (Merge → MergeFailure).
|
||||||
// Only send the notification when the stage actually changed; if the
|
// Only send the notification when the stage actually changed; if the
|
||||||
@@ -136,7 +208,7 @@ impl AgentPool {
|
|||||||
crate::pipeline_state::Stage::MergeFailure { .. }
|
crate::pipeline_state::Stage::MergeFailure { .. }
|
||||||
),
|
),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
crate::slog_error!(
|
slog_error!(
|
||||||
"[merge] Failed to transition '{sid}' to MergeFailure: {e}"
|
"[merge] Failed to transition '{sid}' to MergeFailure: {e}"
|
||||||
);
|
);
|
||||||
true
|
true
|
||||||
|
|||||||
@@ -64,6 +64,44 @@ fn inject_gate_failure_section(args: &mut Vec<String>, gate_output: &str) {
|
|||||||
args.push(section);
|
args.push(section);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Cap `--max-turns` and `--max-budget-usd` for merge-gate fixup sessions (story 981).
|
||||||
|
///
|
||||||
|
/// When [`ContentKey::MergeFixupPending`] is set the fixup coder must not run
|
||||||
|
/// longer than 20 turns or spend more than $1. If the agent config already
|
||||||
|
/// specifies lower values those are preserved; otherwise the fixup caps take
|
||||||
|
/// precedence by overriding the args in place.
|
||||||
|
pub(super) fn maybe_cap_for_merge_fixup(args: &mut Vec<String>, story_id: &str) {
|
||||||
|
if crate::db::read_content(crate::db::ContentKey::MergeFixupPending(story_id)).is_none() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Override --max-turns: set to 20 unless already lower.
|
||||||
|
const FIXUP_MAX_TURNS: u32 = 20;
|
||||||
|
if let Some(pos) = args.iter().position(|a| a == "--max-turns") {
|
||||||
|
if let Some(val) = args.get_mut(pos + 1) {
|
||||||
|
let current: u32 = val.parse().unwrap_or(u32::MAX);
|
||||||
|
if current > FIXUP_MAX_TURNS {
|
||||||
|
*val = FIXUP_MAX_TURNS.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
args.push("--max-turns".to_string());
|
||||||
|
args.push(FIXUP_MAX_TURNS.to_string());
|
||||||
|
}
|
||||||
|
// Override --max-budget-usd: set to 1.0 unless already lower.
|
||||||
|
const FIXUP_MAX_BUDGET: f64 = 1.0;
|
||||||
|
if let Some(pos) = args.iter().position(|a| a == "--max-budget-usd") {
|
||||||
|
if let Some(val) = args.get_mut(pos + 1) {
|
||||||
|
let current: f64 = val.parse().unwrap_or(f64::MAX);
|
||||||
|
if current > FIXUP_MAX_BUDGET {
|
||||||
|
*val = FIXUP_MAX_BUDGET.to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
args.push("--max-budget-usd".to_string());
|
||||||
|
args.push(FIXUP_MAX_BUDGET.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// On retry spawns (retry_count > 0), read the stored gate_output from the DB
|
/// On retry spawns (retry_count > 0), read the stored gate_output from the DB
|
||||||
/// and inject it into `--append-system-prompt` so the agent always sees the
|
/// and inject it into `--append-system-prompt` so the agent always sees the
|
||||||
/// prior failure context, even when session-resuming (story 881).
|
/// prior failure context, even when session-resuming (story 881).
|
||||||
@@ -216,6 +254,8 @@ pub(super) async fn run_agent_spawn(
|
|||||||
// On retry spawns (retry_count > 0), inject prior gate failure output into
|
// On retry spawns (retry_count > 0), inject prior gate failure output into
|
||||||
// --append-system-prompt so the agent always sees the failure context (story 881).
|
// --append-system-prompt so the agent always sees the failure context (story 881).
|
||||||
maybe_inject_gate_failure(&mut args, &sid);
|
maybe_inject_gate_failure(&mut args, &sid);
|
||||||
|
// Cap turns and budget for merge-gate fixup sessions (story 981).
|
||||||
|
maybe_cap_for_merge_fixup(&mut args, &sid);
|
||||||
|
|
||||||
// Append project-local prompt content (.huskies/AGENT.md) to the
|
// Append project-local prompt content (.huskies/AGENT.md) to the
|
||||||
// baked-in prompt so every agent role sees project-specific guidance
|
// baked-in prompt so every agent role sees project-specific guidance
|
||||||
|
|||||||
@@ -26,6 +26,13 @@ pub enum ContentKey<'a> {
|
|||||||
RunTestsOk(&'a str),
|
RunTestsOk(&'a str),
|
||||||
/// Flag indicating a commit-recovery respawn is in progress.
|
/// Flag indicating a commit-recovery respawn is in progress.
|
||||||
CommitRecoveryPending(&'a str),
|
CommitRecoveryPending(&'a str),
|
||||||
|
/// Flag indicating a merge gate fixup coder session is in progress.
|
||||||
|
///
|
||||||
|
/// Set when the merge gate fails with a self-evident-fix class of failure
|
||||||
|
/// (fmt drift, clippy warning, missing doc) so the pipeline advance handler
|
||||||
|
/// can route the fixup coder's completion directly back to merge instead of
|
||||||
|
/// through the normal QA path (story 981).
|
||||||
|
MergeFixupPending(&'a str),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ContentKey<'a> {
|
impl<'a> ContentKey<'a> {
|
||||||
@@ -41,6 +48,7 @@ impl<'a> ContentKey<'a> {
|
|||||||
ContentKey::MergeMasterSpawnCount(id) => format!("{id}:mergemaster_spawn_count"),
|
ContentKey::MergeMasterSpawnCount(id) => format!("{id}:mergemaster_spawn_count"),
|
||||||
ContentKey::RunTestsOk(id) => format!("{id}:run_tests_ok"),
|
ContentKey::RunTestsOk(id) => format!("{id}:run_tests_ok"),
|
||||||
ContentKey::CommitRecoveryPending(id) => format!("{id}:commit_recovery_pending"),
|
ContentKey::CommitRecoveryPending(id) => format!("{id}:commit_recovery_pending"),
|
||||||
|
ContentKey::MergeFixupPending(id) => format!("{id}:merge_fixup_pending"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user