huskies: merge 920
This commit is contained in:
@@ -657,6 +657,92 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
// ── Story 920: transient vs genuine mergemaster termination ──────────────
|
||||
|
||||
/// AC4 (transient): a mergemaster that was killed transiently (no
|
||||
/// report_merge_failure, spawn count below cap) must be re-spawned by the
|
||||
/// next auto-assign pass — `mergemaster_attempted` stays false.
|
||||
#[tokio::test]
|
||||
async fn transient_mergemaster_exit_allows_respawn() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let sk = tmp.path().join(".huskies");
|
||||
std::fs::create_dir_all(&sk).unwrap();
|
||||
std::fs::write(
|
||||
sk.join("project.toml"),
|
||||
"[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
|
||||
)
|
||||
.unwrap();
|
||||
crate::crdt_state::init_for_test();
|
||||
crate::db::ensure_content_store();
|
||||
crate::db::write_item_with_content(
|
||||
"920_story_transient",
|
||||
"4_merge_failure",
|
||||
"---\nname: Transient\nmerge_failure: \"CONFLICT (content): foo.rs\"\n---\n",
|
||||
crate::db::ItemMeta::from_yaml(
|
||||
"---\nname: Transient\nmerge_failure: \"CONFLICT (content): foo.rs\"\n---\n",
|
||||
),
|
||||
);
|
||||
// Simulate two previous transient exits (below cap of 3) recorded in DB.
|
||||
crate::db::write_content("920_story_transient:mergemaster_spawn_count", "2");
|
||||
|
||||
// mergemaster_attempted must still be false (transient exits don't set it).
|
||||
let pool = AgentPool::new_test(3001);
|
||||
pool.auto_assign_available_work(tmp.path()).await;
|
||||
|
||||
let agents = pool.agents.lock().unwrap();
|
||||
let respawned = agents.iter().any(|(key, a)| {
|
||||
key.contains("920_story_transient")
|
||||
&& a.agent_name == "mergemaster"
|
||||
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
|
||||
});
|
||||
assert!(
|
||||
respawned,
|
||||
"mergemaster must re-spawn after transient terminations while below cap"
|
||||
);
|
||||
}
|
||||
|
||||
/// AC4 (genuine): after report_merge_failure, mergemaster_attempted is set
|
||||
/// to true and auto-assign must not trigger another re-spawn.
|
||||
#[tokio::test]
|
||||
async fn genuine_mergemaster_exit_no_respawn() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let sk = tmp.path().join(".huskies");
|
||||
std::fs::create_dir_all(&sk).unwrap();
|
||||
std::fs::write(
|
||||
sk.join("project.toml"),
|
||||
"[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
|
||||
)
|
||||
.unwrap();
|
||||
crate::crdt_state::init_for_test();
|
||||
crate::db::ensure_content_store();
|
||||
// mergemaster_attempted is set by the exit path when genuine give-up occurs.
|
||||
crate::db::write_item_with_content(
|
||||
"920_story_genuine",
|
||||
"4_merge_failure",
|
||||
"---\nname: Genuine\nmerge_failure: \"CONFLICT (content): bar.rs\"\n---\n",
|
||||
crate::db::ItemMeta::from_yaml(
|
||||
"---\nname: Genuine\nmerge_failure: \"CONFLICT (content): bar.rs\"\n---\n",
|
||||
),
|
||||
);
|
||||
// The CRDT register is the sole authority; set it explicitly as the
|
||||
// spawn exit path would after report_merge_failure.
|
||||
crate::crdt_state::set_mergemaster_attempted("920_story_genuine", true);
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
pool.auto_assign_available_work(tmp.path()).await;
|
||||
|
||||
let agents = pool.agents.lock().unwrap();
|
||||
let spawned = agents.iter().any(|(key, a)| {
|
||||
key.contains("920_story_genuine")
|
||||
&& a.agent_name == "mergemaster"
|
||||
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
|
||||
});
|
||||
assert!(
|
||||
!spawned,
|
||||
"mergemaster must not re-spawn after genuine give-up (mergemaster_attempted=true)"
|
||||
);
|
||||
}
|
||||
|
||||
/// Two concurrent auto_assign_available_work calls must not assign the same
|
||||
/// agent to two stories simultaneously. After both complete, at most one
|
||||
/// Pending/Running entry must exist per agent name.
|
||||
|
||||
@@ -146,7 +146,6 @@ impl AgentPool {
|
||||
"[auto-assign] Content conflict on '{story_id}'; \
|
||||
auto-spawning mergemaster '{agent_name}'."
|
||||
);
|
||||
crate::crdt_state::set_mergemaster_attempted(story_id, true);
|
||||
if let Err(e) = self
|
||||
.start_agent(project_root, story_id, Some(&agent_name), None, None)
|
||||
.await
|
||||
|
||||
@@ -512,15 +512,19 @@ pub(super) async fn run_agent_spawn(
|
||||
crate::db::delete_content(&format!("{sid}:abort_respawn_count"));
|
||||
|
||||
if stage == PipelineStage::Mergemaster {
|
||||
let (tx_done, done_session_id) = {
|
||||
let (tx_done, done_session_id, merge_failure_reported) = {
|
||||
let mut lock = match agents_ref.lock() {
|
||||
Ok(a) => a,
|
||||
Err(_) => return,
|
||||
};
|
||||
if let Some(agent) = lock.remove(&key_clone) {
|
||||
(agent.tx, agent.session_id.or(result.session_id))
|
||||
(
|
||||
agent.tx,
|
||||
agent.session_id.or(result.session_id),
|
||||
agent.merge_failure_reported,
|
||||
)
|
||||
} else {
|
||||
(tx_clone.clone(), result.session_id)
|
||||
(tx_clone.clone(), result.session_id, false)
|
||||
}
|
||||
};
|
||||
// Clear any stale Running merge job so the next mergemaster
|
||||
@@ -531,6 +535,44 @@ pub(super) async fn run_agent_spawn(
|
||||
{
|
||||
crate::crdt_state::delete_merge_job(&sid);
|
||||
}
|
||||
// Classify termination: genuine (report_merge_failure called, or
|
||||
// the transient-respawn budget is exhausted) vs transient
|
||||
// (watchdog / rate-limit / crash without an explicit give-up call).
|
||||
// Only mark mergemaster_attempted on a genuine give-up so that
|
||||
// transient exits can be re-spawned up to the cap (story 920).
|
||||
const MERGEMASTER_RESPAWN_CAP: u32 = 3;
|
||||
let spawn_count_key = format!("{sid}:mergemaster_spawn_count");
|
||||
let is_genuine = if merge_failure_reported {
|
||||
slog!(
|
||||
"[agents] Mergemaster '{aname}' for '{sid}' gave up genuinely \
|
||||
(report_merge_failure called)."
|
||||
);
|
||||
true
|
||||
} else {
|
||||
let count = crate::db::read_content(&spawn_count_key)
|
||||
.and_then(|s| s.trim().parse::<u32>().ok())
|
||||
.unwrap_or(0)
|
||||
+ 1;
|
||||
crate::db::write_content(&spawn_count_key, &count.to_string());
|
||||
if count >= MERGEMASTER_RESPAWN_CAP {
|
||||
slog!(
|
||||
"[agents] Mergemaster '{aname}' for '{sid}' exhausted \
|
||||
respawn budget ({count}/{MERGEMASTER_RESPAWN_CAP}); \
|
||||
marking as permanently blocked."
|
||||
);
|
||||
true
|
||||
} else {
|
||||
slog!(
|
||||
"[agents] Mergemaster '{aname}' for '{sid}' terminated \
|
||||
transiently (spawn {count}/{MERGEMASTER_RESPAWN_CAP}); \
|
||||
will re-spawn."
|
||||
);
|
||||
false
|
||||
}
|
||||
};
|
||||
if is_genuine {
|
||||
crate::crdt_state::set_mergemaster_attempted(&sid, true);
|
||||
}
|
||||
let _ = tx_done.send(AgentEvent::Done {
|
||||
story_id: sid.clone(),
|
||||
agent_name: aname.clone(),
|
||||
|
||||
Reference in New Issue
Block a user