huskies: merge 920
This commit is contained in:
@@ -657,6 +657,92 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Story 920: transient vs genuine mergemaster termination ──────────────
|
||||||
|
|
||||||
|
/// AC4 (transient): a mergemaster that was killed transiently (no
|
||||||
|
/// report_merge_failure, spawn count below cap) must be re-spawned by the
|
||||||
|
/// next auto-assign pass — `mergemaster_attempted` stays false.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn transient_mergemaster_exit_allows_respawn() {
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let sk = tmp.path().join(".huskies");
|
||||||
|
std::fs::create_dir_all(&sk).unwrap();
|
||||||
|
std::fs::write(
|
||||||
|
sk.join("project.toml"),
|
||||||
|
"[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
crate::crdt_state::init_for_test();
|
||||||
|
crate::db::ensure_content_store();
|
||||||
|
crate::db::write_item_with_content(
|
||||||
|
"920_story_transient",
|
||||||
|
"4_merge_failure",
|
||||||
|
"---\nname: Transient\nmerge_failure: \"CONFLICT (content): foo.rs\"\n---\n",
|
||||||
|
crate::db::ItemMeta::from_yaml(
|
||||||
|
"---\nname: Transient\nmerge_failure: \"CONFLICT (content): foo.rs\"\n---\n",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
// Simulate two previous transient exits (below cap of 3) recorded in DB.
|
||||||
|
crate::db::write_content("920_story_transient:mergemaster_spawn_count", "2");
|
||||||
|
|
||||||
|
// mergemaster_attempted must still be false (transient exits don't set it).
|
||||||
|
let pool = AgentPool::new_test(3001);
|
||||||
|
pool.auto_assign_available_work(tmp.path()).await;
|
||||||
|
|
||||||
|
let agents = pool.agents.lock().unwrap();
|
||||||
|
let respawned = agents.iter().any(|(key, a)| {
|
||||||
|
key.contains("920_story_transient")
|
||||||
|
&& a.agent_name == "mergemaster"
|
||||||
|
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
|
||||||
|
});
|
||||||
|
assert!(
|
||||||
|
respawned,
|
||||||
|
"mergemaster must re-spawn after transient terminations while below cap"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// AC4 (genuine): after report_merge_failure, mergemaster_attempted is set
|
||||||
|
/// to true and auto-assign must not trigger another re-spawn.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn genuine_mergemaster_exit_no_respawn() {
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let sk = tmp.path().join(".huskies");
|
||||||
|
std::fs::create_dir_all(&sk).unwrap();
|
||||||
|
std::fs::write(
|
||||||
|
sk.join("project.toml"),
|
||||||
|
"[[agent]]\nname = \"mergemaster\"\nstage = \"mergemaster\"\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
crate::crdt_state::init_for_test();
|
||||||
|
crate::db::ensure_content_store();
|
||||||
|
// mergemaster_attempted is set by the exit path when genuine give-up occurs.
|
||||||
|
crate::db::write_item_with_content(
|
||||||
|
"920_story_genuine",
|
||||||
|
"4_merge_failure",
|
||||||
|
"---\nname: Genuine\nmerge_failure: \"CONFLICT (content): bar.rs\"\n---\n",
|
||||||
|
crate::db::ItemMeta::from_yaml(
|
||||||
|
"---\nname: Genuine\nmerge_failure: \"CONFLICT (content): bar.rs\"\n---\n",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
// The CRDT register is the sole authority; set it explicitly as the
|
||||||
|
// spawn exit path would after report_merge_failure.
|
||||||
|
crate::crdt_state::set_mergemaster_attempted("920_story_genuine", true);
|
||||||
|
|
||||||
|
let pool = AgentPool::new_test(3001);
|
||||||
|
pool.auto_assign_available_work(tmp.path()).await;
|
||||||
|
|
||||||
|
let agents = pool.agents.lock().unwrap();
|
||||||
|
let spawned = agents.iter().any(|(key, a)| {
|
||||||
|
key.contains("920_story_genuine")
|
||||||
|
&& a.agent_name == "mergemaster"
|
||||||
|
&& matches!(a.status, AgentStatus::Pending | AgentStatus::Running)
|
||||||
|
});
|
||||||
|
assert!(
|
||||||
|
!spawned,
|
||||||
|
"mergemaster must not re-spawn after genuine give-up (mergemaster_attempted=true)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/// Two concurrent auto_assign_available_work calls must not assign the same
|
/// Two concurrent auto_assign_available_work calls must not assign the same
|
||||||
/// agent to two stories simultaneously. After both complete, at most one
|
/// agent to two stories simultaneously. After both complete, at most one
|
||||||
/// Pending/Running entry must exist per agent name.
|
/// Pending/Running entry must exist per agent name.
|
||||||
|
|||||||
@@ -146,7 +146,6 @@ impl AgentPool {
|
|||||||
"[auto-assign] Content conflict on '{story_id}'; \
|
"[auto-assign] Content conflict on '{story_id}'; \
|
||||||
auto-spawning mergemaster '{agent_name}'."
|
auto-spawning mergemaster '{agent_name}'."
|
||||||
);
|
);
|
||||||
crate::crdt_state::set_mergemaster_attempted(story_id, true);
|
|
||||||
if let Err(e) = self
|
if let Err(e) = self
|
||||||
.start_agent(project_root, story_id, Some(&agent_name), None, None)
|
.start_agent(project_root, story_id, Some(&agent_name), None, None)
|
||||||
.await
|
.await
|
||||||
|
|||||||
@@ -512,15 +512,19 @@ pub(super) async fn run_agent_spawn(
|
|||||||
crate::db::delete_content(&format!("{sid}:abort_respawn_count"));
|
crate::db::delete_content(&format!("{sid}:abort_respawn_count"));
|
||||||
|
|
||||||
if stage == PipelineStage::Mergemaster {
|
if stage == PipelineStage::Mergemaster {
|
||||||
let (tx_done, done_session_id) = {
|
let (tx_done, done_session_id, merge_failure_reported) = {
|
||||||
let mut lock = match agents_ref.lock() {
|
let mut lock = match agents_ref.lock() {
|
||||||
Ok(a) => a,
|
Ok(a) => a,
|
||||||
Err(_) => return,
|
Err(_) => return,
|
||||||
};
|
};
|
||||||
if let Some(agent) = lock.remove(&key_clone) {
|
if let Some(agent) = lock.remove(&key_clone) {
|
||||||
(agent.tx, agent.session_id.or(result.session_id))
|
(
|
||||||
|
agent.tx,
|
||||||
|
agent.session_id.or(result.session_id),
|
||||||
|
agent.merge_failure_reported,
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
(tx_clone.clone(), result.session_id)
|
(tx_clone.clone(), result.session_id, false)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// Clear any stale Running merge job so the next mergemaster
|
// Clear any stale Running merge job so the next mergemaster
|
||||||
@@ -531,6 +535,44 @@ pub(super) async fn run_agent_spawn(
|
|||||||
{
|
{
|
||||||
crate::crdt_state::delete_merge_job(&sid);
|
crate::crdt_state::delete_merge_job(&sid);
|
||||||
}
|
}
|
||||||
|
// Classify termination: genuine (report_merge_failure called, or
|
||||||
|
// the transient-respawn budget is exhausted) vs transient
|
||||||
|
// (watchdog / rate-limit / crash without an explicit give-up call).
|
||||||
|
// Only mark mergemaster_attempted on a genuine give-up so that
|
||||||
|
// transient exits can be re-spawned up to the cap (story 920).
|
||||||
|
const MERGEMASTER_RESPAWN_CAP: u32 = 3;
|
||||||
|
let spawn_count_key = format!("{sid}:mergemaster_spawn_count");
|
||||||
|
let is_genuine = if merge_failure_reported {
|
||||||
|
slog!(
|
||||||
|
"[agents] Mergemaster '{aname}' for '{sid}' gave up genuinely \
|
||||||
|
(report_merge_failure called)."
|
||||||
|
);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
let count = crate::db::read_content(&spawn_count_key)
|
||||||
|
.and_then(|s| s.trim().parse::<u32>().ok())
|
||||||
|
.unwrap_or(0)
|
||||||
|
+ 1;
|
||||||
|
crate::db::write_content(&spawn_count_key, &count.to_string());
|
||||||
|
if count >= MERGEMASTER_RESPAWN_CAP {
|
||||||
|
slog!(
|
||||||
|
"[agents] Mergemaster '{aname}' for '{sid}' exhausted \
|
||||||
|
respawn budget ({count}/{MERGEMASTER_RESPAWN_CAP}); \
|
||||||
|
marking as permanently blocked."
|
||||||
|
);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
slog!(
|
||||||
|
"[agents] Mergemaster '{aname}' for '{sid}' terminated \
|
||||||
|
transiently (spawn {count}/{MERGEMASTER_RESPAWN_CAP}); \
|
||||||
|
will re-spawn."
|
||||||
|
);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if is_genuine {
|
||||||
|
crate::crdt_state::set_mergemaster_attempted(&sid, true);
|
||||||
|
}
|
||||||
let _ = tx_done.send(AgentEvent::Done {
|
let _ = tx_done.send(AgentEvent::Done {
|
||||||
story_id: sid.clone(),
|
story_id: sid.clone(),
|
||||||
agent_name: aname.clone(),
|
agent_name: aname.clone(),
|
||||||
|
|||||||
Reference in New Issue
Block a user