Files
huskies/server/src/agents/pool/auto_assign/merge_failure_subscriber.rs
T

369 lines
14 KiB
Rust
Raw Normal View History

2026-05-13 19:29:08 +00:00
//! TransitionFired subscriber that auto-spawns mergemaster on ConflictDetected merge failures.
//!
//! Listens on the pipeline transition broadcast channel and schedules a
//! mergemaster agent whenever a story enters
//! `Stage::MergeFailure { kind: ConflictDetected(_), .. }`.
//! Other [`MergeFailureKind`] variants require human intervention and are
//! intentionally ignored here.
use std::path::{Path, PathBuf};
use std::sync::Arc;
use crate::pipeline_state::{MergeFailureKind, Stage};
use crate::slog;
use crate::slog_warn;
use super::super::super::PipelineStage;
use super::super::AgentPool;
use super::scan::{find_free_agent_for_stage, is_story_assigned_for_stage};
/// Spawn a background task that auto-spawns mergemaster agents on
/// `Stage::MergeFailure { kind: ConflictDetected(_) }` transitions.
///
/// The task subscribes to the pipeline transition broadcast channel and calls
/// [`AgentPool::start_agent`] with the first free mergemaster agent whenever a
/// story transitions into a recoverable conflict state. All other
/// [`MergeFailureKind`] variants are silently skipped — they need a human.
pub(crate) fn spawn_merge_failure_subscriber(pool: Arc<AgentPool>, project_root: PathBuf) {
let mut rx = crate::pipeline_state::subscribe_transitions();
tokio::spawn(async move {
loop {
match rx.recv().await {
Ok(fired) => {
on_merge_failure_transition(&pool, &project_root, &fired).await;
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {
slog_warn!(
"[merge-failure-sub] Subscriber lagged, skipped {n} event(s). \
ConflictDetected stories may need manual mergemaster spawn."
);
}
Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
}
}
});
}
async fn on_merge_failure_transition(
pool: &AgentPool,
project_root: &Path,
fired: &crate::pipeline_state::TransitionFired,
) {
let Stage::MergeFailure { ref kind, .. } = fired.after else {
return;
};
let story_id = &fired.story_id.0;
match kind {
MergeFailureKind::ConflictDetected(_) => {
let config = match crate::config::ProjectConfig::load(project_root) {
Ok(c) => c,
Err(e) => {
slog_warn!("[merge-failure-sub] Failed to load config for '{story_id}': {e}");
return;
}
};
let agent_name = {
let agents = match pool.agents.lock() {
Ok(a) => a,
Err(e) => {
slog_warn!(
"[merge-failure-sub] Failed to lock agent pool for '{story_id}': {e}"
);
return;
}
};
if is_story_assigned_for_stage(
&config,
&agents,
story_id,
&PipelineStage::Mergemaster,
) {
return; // mergemaster already running for this story
}
find_free_agent_for_stage(&config, &agents, &PipelineStage::Mergemaster)
.map(str::to_string)
};
if let Some(agent) = agent_name {
slog!(
"[merge-failure-sub] ConflictDetected on '{story_id}'; \
auto-spawning mergemaster '{agent}'."
);
if let Err(e) = pool
.start_agent(project_root, story_id, Some(&agent), None, None)
.await
{
slog!("[merge-failure-sub] Failed to spawn '{agent}' for '{story_id}': {e}");
}
} else {
slog!(
"[merge-failure-sub] ConflictDetected on '{story_id}'; \
no free mergemaster agent available."
);
}
}
// GatesFailed, EmptyDiff, NoCommits, Other — all require human intervention.
MergeFailureKind::GatesFailed(_)
| MergeFailureKind::EmptyDiff
| MergeFailureKind::NoCommits
| MergeFailureKind::Other(_) => {}
}
}
// ── Tests ──────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::agents::{AgentPool, AgentStatus};
use crate::io::watcher::WatcherEvent;
use std::sync::Arc;
use tokio::sync::broadcast;
fn setup_project(tmp: &tempfile::TempDir) {
let sk = tmp.path().join(".huskies");
std::fs::create_dir_all(&sk).unwrap();
std::fs::write(
sk.join("project.toml"),
"[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
)
.unwrap();
}
fn seed_at_merge(story_id: &str) {
crate::crdt_state::init_for_test();
crate::db::ensure_content_store();
crate::db::write_item_with_content(
story_id,
"4_merge",
"---\nname: Test\n---\n",
crate::db::ItemMeta::named("Test"),
);
}
fn make_pool(port: u16) -> Arc<AgentPool> {
let (tx, _) = broadcast::channel::<WatcherEvent>(4);
Arc::new(AgentPool::new(port, tx))
}
fn make_fired(
story_id: &str,
kind: MergeFailureKind,
) -> crate::pipeline_state::TransitionFired {
use crate::pipeline_state::{BranchName, PipelineEvent, StoryId, TransitionFired};
use std::num::NonZeroU32;
TransitionFired {
story_id: StoryId(story_id.to_string()),
before: crate::pipeline_state::Stage::Merge {
feature_branch: BranchName("feature/test".to_string()),
commits_ahead: NonZeroU32::new(1).unwrap(),
},
after: crate::pipeline_state::Stage::MergeFailure {
kind: kind.clone(),
feature_branch: BranchName("feature/test".to_string()),
commits_ahead: NonZeroU32::new(1).unwrap(),
},
event: PipelineEvent::MergeFailed { kind },
at: chrono::Utc::now(),
}
}
// ── AC4: each MergeFailureKind variant ──────────────────────────────────
/// ConflictDetected → on_merge_failure_transition must spawn mergemaster.
///
/// Calls the handler directly (not via the broadcast subscriber) to avoid
/// cross-test channel contamination from the global TRANSITION_TX.
#[tokio::test]
async fn conflict_detected_spawns_mergemaster_via_subscriber() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "998_sub_conflict";
seed_at_merge(story_id);
let pool = make_pool(3998);
let fired = make_fired(
story_id,
MergeFailureKind::ConflictDetected(Some("CONFLICT (content): src/lib.rs".to_string())),
);
on_merge_failure_transition(&pool, tmp.path(), &fired).await;
let agents = pool.agents.lock().unwrap();
assert!(
agents.iter().any(|(key, a)| {
key.contains(story_id)
&& a.agent_name == "mergemaster"
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
}),
"mergemaster must be spawned for ConflictDetected"
);
}
/// GatesFailed → subscriber must NOT spawn mergemaster (human intervention needed).
#[tokio::test]
async fn gates_failed_does_not_spawn_mergemaster() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "998_sub_gates";
seed_at_merge(story_id);
let pool = make_pool(3997);
spawn_merge_failure_subscriber(Arc::clone(&pool), tmp.path().to_path_buf());
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::GatesFailed("error[E0308]: mismatched types".to_string()),
)
.expect("transition must succeed");
// Give the subscriber time to run (it should do nothing).
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let agents = pool.agents.lock().unwrap();
let spawned = agents.iter().any(|(key, a)| {
key.contains(story_id)
&& a.agent_name == "mergemaster"
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
});
assert!(!spawned, "mergemaster must NOT be spawned for GatesFailed");
}
/// EmptyDiff → subscriber must NOT spawn mergemaster.
#[tokio::test]
async fn empty_diff_does_not_spawn_mergemaster() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "998_sub_emptydiff";
seed_at_merge(story_id);
let pool = make_pool(3996);
spawn_merge_failure_subscriber(Arc::clone(&pool), tmp.path().to_path_buf());
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::EmptyDiff,
)
.expect("transition must succeed");
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let agents = pool.agents.lock().unwrap();
let spawned = agents.iter().any(|(key, a)| {
key.contains(story_id)
&& a.agent_name == "mergemaster"
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
});
assert!(!spawned, "mergemaster must NOT be spawned for EmptyDiff");
}
/// NoCommits → subscriber must NOT spawn mergemaster.
#[tokio::test]
async fn no_commits_does_not_spawn_mergemaster() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "998_sub_nocommits";
seed_at_merge(story_id);
let pool = make_pool(3995);
spawn_merge_failure_subscriber(Arc::clone(&pool), tmp.path().to_path_buf());
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::NoCommits,
)
.expect("transition must succeed");
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let agents = pool.agents.lock().unwrap();
let spawned = agents.iter().any(|(key, a)| {
key.contains(story_id)
&& a.agent_name == "mergemaster"
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
});
assert!(!spawned, "mergemaster must NOT be spawned for NoCommits");
}
/// Other(_) → subscriber must NOT spawn mergemaster.
#[tokio::test]
async fn other_does_not_spawn_mergemaster() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "998_sub_other";
seed_at_merge(story_id);
let pool = make_pool(3994);
spawn_merge_failure_subscriber(Arc::clone(&pool), tmp.path().to_path_buf());
crate::agents::lifecycle::transition_to_merge_failure(
story_id,
MergeFailureKind::Other("unknown error".to_string()),
)
.expect("transition must succeed");
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let agents = pool.agents.lock().unwrap();
let spawned = agents.iter().any(|(key, a)| {
key.contains(story_id)
&& a.agent_name == "mergemaster"
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
});
assert!(!spawned, "mergemaster must NOT be spawned for Other");
}
/// ConflictDetected self-loop — handler must NOT spawn a second mergemaster
/// when one is already Pending/Running for the story.
///
/// Calls the handler twice directly (no broadcast subscriber) so there is no
/// timing window: the first call sets the agent to Pending synchronously,
/// and the second call sees that Pending entry and returns early.
#[tokio::test]
async fn conflict_detected_self_loop_does_not_double_spawn() {
let tmp = tempfile::tempdir().unwrap();
setup_project(&tmp);
let story_id = "998_sub_selfloop";
seed_at_merge(story_id);
let pool = make_pool(3993);
let fired = make_fired(
story_id,
MergeFailureKind::ConflictDetected(Some("CONFLICT".to_string())),
);
// First call — spawns mergemaster (agent enters Pending).
on_merge_failure_transition(&pool, tmp.path(), &fired).await;
{
let agents = pool.agents.lock().unwrap();
assert!(
agents.iter().any(|(key, a)| {
key.contains(story_id)
&& a.agent_name == "mergemaster"
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
}),
"mergemaster must be Pending after first ConflictDetected"
);
}
// Second call (self-loop) — agent is still Pending; guard must prevent double-spawn.
on_merge_failure_transition(&pool, tmp.path(), &fired).await;
let agents = pool.agents.lock().unwrap();
let active_count = agents
.iter()
.filter(|(key, a)| {
key.contains(story_id)
&& a.agent_name == "mergemaster"
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
})
.count();
assert_eq!(
active_count, 1,
"mergemaster must not be double-spawned on ConflictDetected self-loop"
);
}
}