huskies: merge 920

This commit is contained in:
dave
2026-05-12 16:36:15 +00:00
parent 19b7edb60c
commit 86e8f2441f
3 changed files with 131 additions and 4 deletions
+45 -3
View File
@@ -512,15 +512,19 @@ pub(super) async fn run_agent_spawn(
crate::db::delete_content(&format!("{sid}:abort_respawn_count"));
if stage == PipelineStage::Mergemaster {
let (tx_done, done_session_id) = {
let (tx_done, done_session_id, merge_failure_reported) = {
let mut lock = match agents_ref.lock() {
Ok(a) => a,
Err(_) => return,
};
if let Some(agent) = lock.remove(&key_clone) {
(agent.tx, agent.session_id.or(result.session_id))
(
agent.tx,
agent.session_id.or(result.session_id),
agent.merge_failure_reported,
)
} else {
(tx_clone.clone(), result.session_id)
(tx_clone.clone(), result.session_id, false)
}
};
// Clear any stale Running merge job so the next mergemaster
@@ -531,6 +535,44 @@ pub(super) async fn run_agent_spawn(
{
crate::crdt_state::delete_merge_job(&sid);
}
// Classify termination: genuine (report_merge_failure called, or
// the transient-respawn budget is exhausted) vs transient
// (watchdog / rate-limit / crash without an explicit give-up call).
// Only mark mergemaster_attempted on a genuine give-up so that
// transient exits can be re-spawned up to the cap (story 920).
const MERGEMASTER_RESPAWN_CAP: u32 = 3;
let spawn_count_key = format!("{sid}:mergemaster_spawn_count");
let is_genuine = if merge_failure_reported {
slog!(
"[agents] Mergemaster '{aname}' for '{sid}' gave up genuinely \
(report_merge_failure called)."
);
true
} else {
let count = crate::db::read_content(&spawn_count_key)
.and_then(|s| s.trim().parse::<u32>().ok())
.unwrap_or(0)
+ 1;
crate::db::write_content(&spawn_count_key, &count.to_string());
if count >= MERGEMASTER_RESPAWN_CAP {
slog!(
"[agents] Mergemaster '{aname}' for '{sid}' exhausted \
respawn budget ({count}/{MERGEMASTER_RESPAWN_CAP}); \
marking as permanently blocked."
);
true
} else {
slog!(
"[agents] Mergemaster '{aname}' for '{sid}' terminated \
transiently (spawn {count}/{MERGEMASTER_RESPAWN_CAP}); \
will re-spawn."
);
false
}
};
if is_genuine {
crate::crdt_state::set_mergemaster_attempted(&sid, true);
}
let _ = tx_done.send(AgentEvent::Done {
story_id: sid.clone(),
agent_name: aname.clone(),