huskies: merge 961

This commit is contained in:
dave
2026-05-13 11:22:57 +00:00
parent 78b1ecdc3c
commit 8b53e20ca9
38 changed files with 327 additions and 146 deletions
+35 -20
View File
@@ -72,7 +72,8 @@ pub(super) fn maybe_inject_gate_failure(args: &mut Vec<String>, story_id: &str)
.map(|item| item.retry_count())
.unwrap_or(0);
if retry_count > 0
&& let Some(gate_output) = crate::db::read_content(&format!("{story_id}:gate_output"))
&& let Some(gate_output) =
crate::db::read_content(crate::db::ContentKey::GateOutput(story_id))
{
inject_gate_failure_section(args, &gate_output);
}
@@ -230,7 +231,10 @@ pub(super) async fn run_agent_spawn(
// Story 933: epic linkage is now a typed CRDT register on PipelineItemCrdt.
if let Some(view) = crate::crdt_state::read_item(&sid)
&& let Some(epic_id) = view.epic()
&& let Some(epic_content) = crate::db::read_content(&epic_id.to_string())
&& let Some(epic_content) = {
let epic_id_str = epic_id.to_string();
crate::db::read_content(crate::db::ContentKey::Story(&epic_id_str))
}
{
let block = format!(
"# Epic Context\n\nThis work item belongs to epic `{epic_id}`.\
@@ -434,12 +438,14 @@ pub(super) async fn run_agent_spawn(
// infinite loops; after the cap, block the story with a clear reason.
if result.aborted_signal && stage != PipelineStage::Mergemaster {
const ABORT_RESPAWN_CAP: u32 = 5;
let db_key = format!("{sid}:abort_respawn_count");
let count = crate::db::read_content(&db_key)
let count = crate::db::read_content(crate::db::ContentKey::AbortRespawnCount(&sid))
.and_then(|s| s.trim().parse::<u32>().ok())
.unwrap_or(0)
+ 1;
crate::db::write_content(&db_key, &count.to_string());
crate::db::write_content(
crate::db::ContentKey::AbortRespawnCount(&sid),
&count.to_string(),
);
// Remove the agent entry from the pool and emit Done so that
// any caller blocked on wait_for_agent is unblocked.
@@ -523,7 +529,7 @@ pub(super) async fn run_agent_spawn(
// Reset the abort-respawn counter on any non-aborted exit so that
// a single successful run clears the consecutive-crash history.
crate::db::delete_content(&format!("{sid}:abort_respawn_count"));
crate::db::delete_content(crate::db::ContentKey::AbortRespawnCount(&sid));
if stage == PipelineStage::Mergemaster {
let (tx_done, done_session_id, merge_failure_reported) = {
@@ -555,7 +561,6 @@ pub(super) async fn run_agent_spawn(
// Only mark mergemaster_attempted on a genuine give-up so that
// transient exits can be re-spawned up to the cap (story 920).
const MERGEMASTER_RESPAWN_CAP: u32 = 3;
let spawn_count_key = format!("{sid}:mergemaster_spawn_count");
let is_genuine = if merge_failure_reported {
slog!(
"[agents] Mergemaster '{aname}' for '{sid}' gave up genuinely \
@@ -563,11 +568,15 @@ pub(super) async fn run_agent_spawn(
);
true
} else {
let count = crate::db::read_content(&spawn_count_key)
.and_then(|s| s.trim().parse::<u32>().ok())
.unwrap_or(0)
+ 1;
crate::db::write_content(&spawn_count_key, &count.to_string());
let count =
crate::db::read_content(crate::db::ContentKey::MergeMasterSpawnCount(&sid))
.and_then(|s| s.trim().parse::<u32>().ok())
.unwrap_or(0)
+ 1;
crate::db::write_content(
crate::db::ContentKey::MergeMasterSpawnCount(&sid),
&count.to_string(),
);
if count >= MERGEMASTER_RESPAWN_CAP {
slog!(
"[agents] Mergemaster '{aname}' for '{sid}' exhausted \
@@ -667,7 +676,7 @@ mod tests {
let gate_output =
"error[E0308]: mismatched types\n --> src/lib.rs:5:10\n = expected i32, found &str";
crate::db::write_content(&format!("{story_id}:gate_output"), gate_output);
crate::db::write_content(crate::db::ContentKey::GateOutput(story_id), gate_output);
let mut args: Vec<String> = vec!["--verbose".to_string()];
maybe_inject_gate_failure(&mut args, story_id);
@@ -703,7 +712,10 @@ mod tests {
);
// retry_count is 0 (default — never bumped).
crate::db::write_content(&format!("{story_id}:gate_output"), "some previous output");
crate::db::write_content(
crate::db::ContentKey::GateOutput(story_id),
"some previous output",
);
let mut args: Vec<String> = vec!["--verbose".to_string()];
maybe_inject_gate_failure(&mut args, story_id);
@@ -767,17 +779,19 @@ mod tests {
crate::db::ItemMeta::named("Test"),
);
let db_key = format!("{story_id}:abort_respawn_count");
const CAP: u32 = 5;
// Simulate CAP consecutive abort-before-session exits.
for expected_count in 1u32..=CAP {
// This is exactly the counter logic in run_agent_spawn's abort path.
let count = crate::db::read_content(&db_key)
let count = crate::db::read_content(crate::db::ContentKey::AbortRespawnCount(story_id))
.and_then(|s| s.trim().parse::<u32>().ok())
.unwrap_or(0)
+ 1;
crate::db::write_content(&db_key, &count.to_string());
crate::db::write_content(
crate::db::ContentKey::AbortRespawnCount(story_id),
&count.to_string(),
);
assert_eq!(
count, expected_count,
"abort counter must increment by 1 each time"
@@ -795,9 +809,10 @@ mod tests {
}
// After CAP cycles the counter equals the cap — the story would be blocked.
let final_count: u32 = crate::db::read_content(&db_key)
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0);
let final_count: u32 =
crate::db::read_content(crate::db::ContentKey::AbortRespawnCount(story_id))
.and_then(|s| s.trim().parse().ok())
.unwrap_or(0);
assert_eq!(
final_count, CAP,
"counter must equal {CAP} after {CAP} abort cycles"