huskies: merge 1018

This commit is contained in:
dave
2026-05-14 09:33:50 +00:00
parent 8b2ba1c810
commit 309542cf2c
12 changed files with 360 additions and 7 deletions
@@ -0,0 +1,311 @@
//! TransitionFired subscriber that auto-blocks stories after N consecutive MergeFailure transitions.
//!
//! Listens on the pipeline transition broadcast channel and, for each story,
//! counts how many times it has entered [`Stage::MergeFailure`] consecutively.
//! When the count reaches the configurable threshold (default 3), the story is
//! transitioned to [`Stage::Blocked`] with a reason that names the failure kind.
//!
//! The counter for a story resets whenever a non-`MergeFailure` transition fires
//! for that story (e.g. after a successful merge or a `FixupRequested` demotion
//! back to coding).
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use crate::pipeline_state::{MergeFailureKind, PipelineEvent, Stage, StoryId};
use crate::slog;
use crate::slog_warn;
/// Spawn a background task that blocks stories after N consecutive `MergeFailure` transitions.
///
/// Subscribes to the pipeline transition broadcast channel and tracks a per-story
/// consecutive-failure counter. When a story's count reaches the threshold configured
/// in `project.toml` (`merge_failure_block_threshold`, default 3), the story is
/// transitioned to `Stage::Blocked` with a reason that names the failure kind.
///
/// The counter resets when the story leaves `MergeFailure` (e.g. on `FixupRequested`,
/// `ReQueuedForQa`, or a successful merge via `Unblock → Merge → Done`).
pub(crate) fn spawn_merge_failure_block_subscriber(project_root: PathBuf) {
let mut rx = crate::pipeline_state::subscribe_transitions();
tokio::spawn(async move {
let mut counters: HashMap<StoryId, (u32, MergeFailureKind)> = HashMap::new();
loop {
match rx.recv().await {
Ok(fired) => {
on_transition(&project_root, &fired, &mut counters);
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
slog_warn!(
"[merge-block-sub] Subscriber lagged, skipped {n} event(s). \
Some consecutive-failure counts may be understated."
);
}
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
}
}
});
}
/// Handle a single transition event: update counters and emit Block if threshold is reached.
fn on_transition(
project_root: &Path,
fired: &crate::pipeline_state::TransitionFired,
counters: &mut HashMap<StoryId, (u32, MergeFailureKind)>,
) {
match &fired.after {
Stage::MergeFailure { kind, .. } => {
let entry = counters
.entry(fired.story_id.clone())
.or_insert_with(|| (0, kind.clone()));
entry.0 += 1;
entry.1 = kind.clone();
let count = entry.0;
let threshold = load_threshold(project_root);
if threshold == 0 {
return;
}
if count >= threshold {
let kind_str = failure_kind_label(kind);
let reason = format!(
"Auto-blocked after {count} consecutive MergeFailure ({kind_str}) transitions."
);
let story_id = fired.story_id.0.as_str();
slog!(
"[merge-block-sub] Story '{story_id}' reached {count} consecutive \
MergeFailure ({kind_str}); blocking."
);
if let Err(e) = crate::pipeline_state::apply_transition(
story_id,
PipelineEvent::Block { reason },
None,
) {
slog_warn!("[merge-block-sub] Failed to block '{story_id}': {e}");
} else {
counters.remove(&fired.story_id);
}
}
}
_ => {
counters.remove(&fired.story_id);
}
}
}
/// Load the threshold from project config, falling back to the compiled default.
fn load_threshold(project_root: &Path) -> u32 {
crate::config::ProjectConfig::load(project_root)
.map(|c| c.merge_failure_block_threshold)
.unwrap_or(3)
}
/// Short human-readable label for a [`MergeFailureKind`] variant.
fn failure_kind_label(kind: &MergeFailureKind) -> &'static str {
match kind {
MergeFailureKind::ConflictDetected(_) => "ConflictDetected",
MergeFailureKind::GatesFailed(_) => "GatesFailed",
MergeFailureKind::EmptyDiff => "EmptyDiff",
MergeFailureKind::NoCommits => "NoCommits",
MergeFailureKind::Other(_) => "Other",
}
}
// ── Tests ──────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::pipeline_state::{BranchName, PipelineEvent, Stage, StoryId, TransitionFired};
use std::num::NonZeroU32;
fn setup_project(tmp: &tempfile::TempDir) {
let sk = tmp.path().join(".huskies");
std::fs::create_dir_all(&sk).unwrap();
std::fs::write(sk.join("project.toml"), "[[agent]]\nname = \"coder\"\n").unwrap();
}
fn seed_at_merge(story_id: &str) {
crate::crdt_state::init_for_test();
crate::db::ensure_content_store();
crate::db::write_item_with_content(
story_id,
"4_merge",
"---\nname: Test\n---\n",
crate::db::ItemMeta::named("Test"),
);
}
fn make_merge_failure_fired(story_id: &str, kind: MergeFailureKind) -> TransitionFired {
TransitionFired {
story_id: StoryId(story_id.to_string()),
before: Stage::Merge {
feature_branch: BranchName("feature/test".to_string()),
commits_ahead: NonZeroU32::new(1).unwrap(),
claim: None,
},
after: Stage::MergeFailure {
kind: kind.clone(),
feature_branch: BranchName("feature/test".to_string()),
commits_ahead: NonZeroU32::new(1).unwrap(),
},
event: PipelineEvent::MergeFailed { kind },
at: chrono::Utc::now(),
}
}
fn make_coding_fired(story_id: &str) -> TransitionFired {
TransitionFired {
story_id: StoryId(story_id.to_string()),
before: Stage::MergeFailure {
kind: MergeFailureKind::GatesFailed("error".to_string()),
feature_branch: BranchName("feature/test".to_string()),
commits_ahead: NonZeroU32::new(1).unwrap(),
},
after: Stage::Coding {
claim: None,
plan: Default::default(),
},
event: PipelineEvent::FixupRequested,
at: chrono::Utc::now(),
}
}
/// AC3 (threshold-not-reached): 2 consecutive failures below threshold of 3 must NOT block.
#[test]
fn below_threshold_does_not_block() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "1018_below";
seed_at_merge(story_id);
// Transition to MergeFailure once to establish the stage.
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::GatesFailed("error".to_string()),
)
.expect("initial MergeFailure transition");
let mut counters: HashMap<StoryId, (u32, MergeFailureKind)> = HashMap::new();
let kind = MergeFailureKind::GatesFailed("error".to_string());
// Fire 2 MergeFailure events (default threshold is 3).
for _ in 0..2 {
let fired = make_merge_failure_fired(story_id, kind.clone());
on_transition(tmp.path(), &fired, &mut counters);
}
// Story must still be in MergeFailure (not Blocked).
let item = crate::pipeline_state::read_typed(story_id)
.expect("read")
.expect("item");
assert!(
matches!(item.stage, Stage::MergeFailure { .. }),
"story must still be in MergeFailure after 2 failures (threshold 3): {:?}",
item.stage
);
}
/// AC3 (threshold-reached): 3 consecutive failures at threshold of 3 must block.
#[test]
fn at_threshold_blocks_with_failure_kind_in_reason() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "1018_at_threshold";
seed_at_merge(story_id);
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::GatesFailed("fmt error".to_string()),
)
.expect("initial MergeFailure transition");
let mut counters: HashMap<StoryId, (u32, MergeFailureKind)> = HashMap::new();
let kind = MergeFailureKind::GatesFailed("fmt error".to_string());
// Fire 3 MergeFailure events — the 3rd must trigger the block.
for _ in 0..3 {
let fired = make_merge_failure_fired(story_id, kind.clone());
on_transition(tmp.path(), &fired, &mut counters);
}
let item = crate::pipeline_state::read_typed(story_id)
.expect("read")
.expect("item");
assert!(
matches!(item.stage, Stage::Blocked { .. }),
"story must be Blocked after 3 consecutive MergeFailures: {:?}",
item.stage
);
// The block reason must name the failure kind.
if let Stage::Blocked { reason } = &item.stage {
assert!(
reason.contains("GatesFailed"),
"block reason must name the failure kind: {reason}"
);
}
}
/// AC3 (reset): counter clears after a non-MergeFailure transition.
///
/// 2 failures → FixupRequested reset → 2 more failures: still below threshold, no block.
#[test]
fn counter_resets_on_non_merge_failure_transition() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "1018_reset";
seed_at_merge(story_id);
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::ConflictDetected(None),
)
.expect("initial MergeFailure transition");
let mut counters: HashMap<StoryId, (u32, MergeFailureKind)> = HashMap::new();
let kind = MergeFailureKind::ConflictDetected(None);
// Fire 2 MergeFailure events.
for _ in 0..2 {
let fired = make_merge_failure_fired(story_id, kind.clone());
on_transition(tmp.path(), &fired, &mut counters);
}
assert_eq!(
counters.get(&StoryId(story_id.to_string())).map(|e| e.0),
Some(2),
"counter must be 2 after 2 failures"
);
// Simulate FixupRequested (non-MergeFailure transition).
let reset_fired = make_coding_fired(story_id);
on_transition(tmp.path(), &reset_fired, &mut counters);
assert!(
!counters.contains_key(&StoryId(story_id.to_string())),
"counter must be cleared after non-MergeFailure transition"
);
// Re-seed to MergeFailure so we can apply the block transition.
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::ConflictDetected(None),
)
.expect("re-enter MergeFailure after reset");
// Fire 2 more MergeFailure events — still below threshold.
for _ in 0..2 {
let fired = make_merge_failure_fired(story_id, kind.clone());
on_transition(tmp.path(), &fired, &mut counters);
}
let item = crate::pipeline_state::read_typed(story_id)
.expect("read")
.expect("item");
assert!(
matches!(item.stage, Stage::MergeFailure { .. }),
"story must still be in MergeFailure after reset + 2 new failures: {:?}",
item.stage
);
}
}
@@ -4,6 +4,8 @@
mod auto_assign;
mod backlog;
mod merge;
/// TransitionFired subscriber that auto-blocks stories after N consecutive MergeFailure transitions.
pub(crate) mod merge_failure_block_subscriber;
/// TransitionFired subscriber that auto-spawns mergemaster on ConflictDetected merge failures.
pub(crate) mod merge_failure_subscriber;
mod pipeline;
@@ -15,5 +17,7 @@ pub(crate) mod watchdog;
// so that pool::lifecycle and pool::pipeline continue to access them unchanged.
pub(super) use scan::{find_free_agent_for_stage, is_agent_free};
/// Re-export for `startup::tick_loop`.
pub(crate) use merge_failure_block_subscriber::spawn_merge_failure_block_subscriber;
/// Re-export for `startup::tick_loop`.
pub(crate) use merge_failure_subscriber::spawn_merge_failure_subscriber;