huskies: merge 1089 bug Stuck-agent detector blocks stories on legitimate exploration / debugging — uses too narrow a "progress" signal
This commit is contained in:
@@ -78,21 +78,34 @@ impl AgentPool {
|
|||||||
// The coder exited with uncommitted content but no commits
|
// The coder exited with uncommitted content but no commits
|
||||||
// (typical "claude-code session boundary mid-sweep" pattern).
|
// (typical "claude-code session boundary mid-sweep" pattern).
|
||||||
// Use a PROGRESS-AWARE retry cap: the agent gets unlimited
|
// Use a PROGRESS-AWARE retry cap: the agent gets unlimited
|
||||||
// respawns as long as file edits keep growing between
|
// respawns as long as progress is being made between attempts.
|
||||||
// attempts; only when the worktree diff is byte-identical
|
// Progress is satisfied if EITHER (a) the worktree diff grew,
|
||||||
// to the previous attempt do we count it as "no progress".
|
// OR (b) the set of files the agent read grew. Raw tool-call
|
||||||
// After NO_PROGRESS_CAP consecutive no-progress respawns,
|
// count does NOT count — a looping agent can produce many calls.
|
||||||
// block for human attention.
|
// Only self-exited sessions with no file or read progress count
|
||||||
|
// toward the cap; forced exits (API error, network, budget
|
||||||
|
// exhaustion) are excluded (story 1089).
|
||||||
|
// After NO_PROGRESS_CAP consecutive qualifying no-progress
|
||||||
|
// respawns, block for human attention.
|
||||||
//
|
//
|
||||||
// TOTAL_ATTEMPTS_CAP is the OUTER bound: even if the agent
|
// TOTAL_ATTEMPTS_CAP is the OUTER bound: even if the agent
|
||||||
// keeps making file-edit progress every session, after this
|
// keeps making file-edit progress every session, after this
|
||||||
// many total respawns without a commit we escalate — caught
|
// many total respawns without a commit we escalate — catches
|
||||||
// the "agent flaps between different edits but never
|
// the "agent flaps between different edits but never commits"
|
||||||
// commits" pattern that the progress-aware counter would
|
// pattern that the progress-aware counter would never trigger.
|
||||||
// never trigger.
|
|
||||||
const NO_PROGRESS_CAP: u32 = 3;
|
const NO_PROGRESS_CAP: u32 = 3;
|
||||||
const TOTAL_ATTEMPTS_CAP: u32 = 8;
|
const TOTAL_ATTEMPTS_CAP: u32 = 8;
|
||||||
|
|
||||||
|
// AC1: consume the forced-exit flag written by spawn.rs when
|
||||||
|
// the agent process exited with a non-zero code.
|
||||||
|
let forced_exit = crate::db::read_content(
|
||||||
|
crate::db::ContentKey::CommitRecoveryForcedExit(story_id),
|
||||||
|
)
|
||||||
|
.is_some();
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryForcedExit(
|
||||||
|
story_id,
|
||||||
|
));
|
||||||
|
|
||||||
let current_fingerprint = worktree_path.as_deref().and_then(|p| {
|
let current_fingerprint = worktree_path.as_deref().and_then(|p| {
|
||||||
std::process::Command::new("git")
|
std::process::Command::new("git")
|
||||||
.args(["diff", "master"])
|
.args(["diff", "master"])
|
||||||
@@ -104,18 +117,31 @@ impl AgentPool {
|
|||||||
let stored_fingerprint = crate::db::read_content(
|
let stored_fingerprint = crate::db::read_content(
|
||||||
crate::db::ContentKey::CommitRecoveryDiffFingerprint(story_id),
|
crate::db::ContentKey::CommitRecoveryDiffFingerprint(story_id),
|
||||||
);
|
);
|
||||||
let made_progress = current_fingerprint.is_some()
|
let diff_progress = current_fingerprint.is_some()
|
||||||
&& stored_fingerprint.as_ref() != current_fingerprint.as_ref();
|
&& stored_fingerprint.as_ref() != current_fingerprint.as_ref();
|
||||||
let no_progress_count = if made_progress || stored_fingerprint.is_none() {
|
|
||||||
|
// AC2: check read-file set progress as an additional signal.
|
||||||
|
let read_progress = previous_session_id.as_deref().is_some_and(|session_id| {
|
||||||
|
collect_read_progress(&project_root, story_id, agent_name, session_id)
|
||||||
|
});
|
||||||
|
|
||||||
|
let made_progress = diff_progress || read_progress;
|
||||||
|
|
||||||
|
let prev_no_progress_count = crate::db::read_content(
|
||||||
|
crate::db::ContentKey::CommitRecoveryPending(story_id),
|
||||||
|
)
|
||||||
|
.and_then(|s| s.trim().parse::<u32>().ok())
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
|
// AC1: forced exits do not increment the stuck-respawn counter.
|
||||||
|
let no_progress_count = if forced_exit {
|
||||||
|
prev_no_progress_count
|
||||||
|
} else if made_progress || stored_fingerprint.is_none() {
|
||||||
1
|
1
|
||||||
} else {
|
} else {
|
||||||
crate::db::read_content(crate::db::ContentKey::CommitRecoveryPending(
|
prev_no_progress_count + 1
|
||||||
story_id,
|
|
||||||
))
|
|
||||||
.and_then(|s| s.trim().parse::<u32>().ok())
|
|
||||||
.unwrap_or(0)
|
|
||||||
+ 1
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let total_attempts = crate::db::read_content(
|
let total_attempts = crate::db::read_content(
|
||||||
crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id),
|
crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id),
|
||||||
)
|
)
|
||||||
@@ -136,13 +162,17 @@ impl AgentPool {
|
|||||||
crate::db::delete_content(
|
crate::db::delete_content(
|
||||||
crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id),
|
crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id),
|
||||||
);
|
);
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryReadSet(
|
||||||
|
story_id,
|
||||||
|
));
|
||||||
slog!(
|
slog!(
|
||||||
"[pipeline] Coder '{agent_name}' for '{story_id}' hit total \
|
"[pipeline] Coder '{agent_name}' for '{story_id}' hit total \
|
||||||
commit-recovery cap ({total_attempts}/{TOTAL_ATTEMPTS_CAP}) \
|
commit-recovery cap ({total_attempts}/{TOTAL_ATTEMPTS_CAP}) \
|
||||||
without a commit. Blocking story."
|
without a commit. Blocking story."
|
||||||
);
|
);
|
||||||
let reason = format!(
|
let reason = format!(
|
||||||
"agent flapped — {total_attempts} respawns without ever committing"
|
"commit absent after {total_attempts} respawns — \
|
||||||
|
agent kept making edits but never committed"
|
||||||
);
|
);
|
||||||
if let Err(e) =
|
if let Err(e) =
|
||||||
crate::agents::lifecycle::transition_to_blocked(story_id, &reason)
|
crate::agents::lifecycle::transition_to_blocked(story_id, &reason)
|
||||||
@@ -167,14 +197,18 @@ impl AgentPool {
|
|||||||
crate::db::delete_content(
|
crate::db::delete_content(
|
||||||
crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id),
|
crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id),
|
||||||
);
|
);
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryReadSet(
|
||||||
|
story_id,
|
||||||
|
));
|
||||||
slog!(
|
slog!(
|
||||||
"[pipeline] Coder '{agent_name}' for '{story_id}' made no \
|
"[pipeline] Coder '{agent_name}' for '{story_id}' made no \
|
||||||
file-edit progress over {no_progress_count} consecutive \
|
file or read progress over {no_progress_count} consecutive \
|
||||||
commit-recovery respawns. Blocking story."
|
self-exit commit-recovery respawns. Blocking story."
|
||||||
);
|
);
|
||||||
|
// AC4: block message names the specific cause.
|
||||||
let reason = format!(
|
let reason = format!(
|
||||||
"agent stuck — {no_progress_count} respawns without commits or \
|
"stuck-respawn cap reached: {NO_PROGRESS_CAP} consecutive \
|
||||||
new file edits"
|
self-exits with no file or read progress"
|
||||||
);
|
);
|
||||||
if let Err(e) =
|
if let Err(e) =
|
||||||
crate::agents::lifecycle::transition_to_blocked(story_id, &reason)
|
crate::agents::lifecycle::transition_to_blocked(story_id, &reason)
|
||||||
@@ -206,7 +240,8 @@ impl AgentPool {
|
|||||||
"[pipeline] Coder '{agent_name}' exited with uncommitted work \
|
"[pipeline] Coder '{agent_name}' exited with uncommitted work \
|
||||||
for '{story_id}' (no-progress {no_progress_count}/\
|
for '{story_id}' (no-progress {no_progress_count}/\
|
||||||
{NO_PROGRESS_CAP}, total {total_attempts}/\
|
{NO_PROGRESS_CAP}, total {total_attempts}/\
|
||||||
{TOTAL_ATTEMPTS_CAP}; progress_made={made_progress}). \
|
{TOTAL_ATTEMPTS_CAP}; diff_progress={diff_progress}, \
|
||||||
|
read_progress={read_progress}, forced_exit={forced_exit}). \
|
||||||
Issuing commit-only respawn."
|
Issuing commit-only respawn."
|
||||||
);
|
);
|
||||||
let addendum = "\n\nYou have uncommitted work in this worktree. \
|
let addendum = "\n\nYou have uncommitted work in this worktree. \
|
||||||
@@ -302,10 +337,13 @@ impl AgentPool {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else if completion.gates_passed {
|
} else if completion.gates_passed {
|
||||||
// Clear any stale recovery key when the coder succeeds normally.
|
// Clear any stale recovery keys when the coder succeeds normally.
|
||||||
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
|
||||||
story_id,
|
story_id,
|
||||||
));
|
));
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryReadSet(
|
||||||
|
story_id,
|
||||||
|
));
|
||||||
// Determine effective QA mode for this story.
|
// Determine effective QA mode for this story.
|
||||||
let qa_mode = {
|
let qa_mode = {
|
||||||
let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
|
let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
|
||||||
@@ -361,11 +399,14 @@ impl AgentPool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Clear any stale recovery key when gates fail normally (agent committed
|
// Clear any stale recovery keys when gates fail normally (agent committed
|
||||||
// but the build is broken — treat as a standard retry, not a recovery).
|
// but the build is broken — treat as a standard retry, not a recovery).
|
||||||
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(
|
||||||
story_id,
|
story_id,
|
||||||
));
|
));
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryReadSet(
|
||||||
|
story_id,
|
||||||
|
));
|
||||||
// Bug 645 / 668: Before retry/block, check if the agent left committed
|
// Bug 645 / 668: Before retry/block, check if the agent left committed
|
||||||
// work AND the agent had a passing run_tests result captured during its
|
// work AND the agent had a passing run_tests result captured during its
|
||||||
// session. An agent may crash mid-output (e.g. Claude Code CLI PTY write
|
// session. An agent may crash mid-output (e.g. Claude Code CLI PTY write
|
||||||
@@ -724,6 +765,109 @@ mod helpers;
|
|||||||
use helpers::{resolve_qa_mode_from_store, write_review_hold_to_store};
|
use helpers::{resolve_qa_mode_from_store, write_review_hold_to_store};
|
||||||
pub(crate) use helpers::{should_block_story, spawn_pipeline_advance};
|
pub(crate) use helpers::{should_block_story, spawn_pipeline_advance};
|
||||||
|
|
||||||
|
/// Parse a huskies agent log and return the set of file paths passed to the
|
||||||
|
/// Read tool in that session. Returns an empty set if the log cannot be read.
|
||||||
|
///
|
||||||
|
/// Used by [`collect_read_progress`] to detect read-exploration progress even
|
||||||
|
/// when the worktree diff did not grow (story 1089, AC2).
|
||||||
|
fn collect_read_files_from_log(
|
||||||
|
project_root: &std::path::Path,
|
||||||
|
story_id: &str,
|
||||||
|
agent_name: &str,
|
||||||
|
session_id: &str,
|
||||||
|
) -> std::collections::HashSet<String> {
|
||||||
|
let log_path = crate::agent_log::log_file_path(project_root, story_id, agent_name, session_id);
|
||||||
|
let mut files = std::collections::HashSet::new();
|
||||||
|
|
||||||
|
let log_text = match std::fs::read_to_string(&log_path) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => return files,
|
||||||
|
};
|
||||||
|
|
||||||
|
for line in log_text.lines() {
|
||||||
|
let trimmed = line.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let entry: serde_json::Value = match serde_json::from_str(trimmed) {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(_) => continue,
|
||||||
|
};
|
||||||
|
// Only look at agent_json events where data.type == "assistant".
|
||||||
|
if entry.get("type").and_then(|t| t.as_str()) != Some("agent_json") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let data = match entry.get("data") {
|
||||||
|
Some(d) => d,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
if data.get("type").and_then(|t| t.as_str()) != Some("assistant") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let content = match data.pointer("/message/content").and_then(|c| c.as_array()) {
|
||||||
|
Some(c) => c,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
for item in content {
|
||||||
|
if item.get("type").and_then(|t| t.as_str()) != Some("tool_use") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if item.get("name").and_then(|n| n.as_str()) != Some("Read") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Some(path) = item.pointer("/input/file_path").and_then(|p| p.as_str()) {
|
||||||
|
files.insert(path.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
files
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return `true` if the agent read any files in `session_id` that were not in
|
||||||
|
/// the cumulative read set for `story_id`. Updates the stored cumulative set
|
||||||
|
/// when new files are found (story 1089, AC2).
|
||||||
|
fn collect_read_progress(
|
||||||
|
project_root: &std::path::Path,
|
||||||
|
story_id: &str,
|
||||||
|
agent_name: &str,
|
||||||
|
session_id: &str,
|
||||||
|
) -> bool {
|
||||||
|
let session_files = collect_read_files_from_log(project_root, story_id, agent_name, session_id);
|
||||||
|
if session_files.is_empty() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let stored_set: std::collections::HashSet<String> =
|
||||||
|
crate::db::read_content(crate::db::ContentKey::CommitRecoveryReadSet(story_id))
|
||||||
|
.map(|s| {
|
||||||
|
s.lines()
|
||||||
|
.filter(|l| !l.is_empty())
|
||||||
|
.map(str::to_string)
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let union: std::collections::HashSet<String> =
|
||||||
|
stored_set.union(&session_files).cloned().collect();
|
||||||
|
|
||||||
|
if union.len() > stored_set.len() {
|
||||||
|
let mut sorted: Vec<&String> = union.iter().collect();
|
||||||
|
sorted.sort();
|
||||||
|
crate::db::write_content(
|
||||||
|
crate::db::ContentKey::CommitRecoveryReadSet(story_id),
|
||||||
|
&sorted
|
||||||
|
.into_iter()
|
||||||
|
.map(String::as_str)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n"),
|
||||||
|
);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -1077,7 +1077,7 @@ stage = "coder"
|
|||||||
"Story must be blocked after NO_PROGRESS_CAP consecutive no-progress respawns"
|
"Story must be blocked after NO_PROGRESS_CAP consecutive no-progress respawns"
|
||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
block_reason.contains("without commits or new file edits"),
|
block_reason.contains("self-exits with no file or read progress"),
|
||||||
"Block reason should describe the no-progress condition, got: {block_reason}"
|
"Block reason should describe the no-progress condition, got: {block_reason}"
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -1193,7 +1193,7 @@ stage = "coder"
|
|||||||
"Story must be blocked once total commit-recovery attempts hits the outer cap"
|
"Story must be blocked once total commit-recovery attempts hits the outer cap"
|
||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
block_reason.contains("flapped") && block_reason.contains("without ever committing"),
|
block_reason.contains("commit absent") && block_reason.contains("never committed"),
|
||||||
"Block reason should describe the flapping pattern, got: {block_reason}"
|
"Block reason should describe the flapping pattern, got: {block_reason}"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -808,6 +808,7 @@ pub(super) async fn run_agent_spawn(
|
|||||||
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryTotalAttempts(
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryTotalAttempts(
|
||||||
&sid,
|
&sid,
|
||||||
));
|
));
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryReadSet(&sid));
|
||||||
|
|
||||||
// Remove agent from the pool and unblock any wait_for_agent callers.
|
// Remove agent from the pool and unblock any wait_for_agent callers.
|
||||||
let tx_done = {
|
let tx_done = {
|
||||||
@@ -873,6 +874,17 @@ pub(super) async fn run_agent_spawn(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AC1 (story 1089): mark forced exits so the commit-recovery
|
||||||
|
// stuck counter is not incremented for API errors, network
|
||||||
|
// failures, or Claude-API budget exhaustion. A non-zero exit
|
||||||
|
// code means the CLI was forced out, not that it chose to stop.
|
||||||
|
if !result.exit_ok {
|
||||||
|
crate::db::write_content(
|
||||||
|
crate::db::ContentKey::CommitRecoveryForcedExit(&sid),
|
||||||
|
"1",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Server-owned completion: run acceptance gates automatically
|
// Server-owned completion: run acceptance gates automatically
|
||||||
// when the agent process exits normally.
|
// when the agent process exits normally.
|
||||||
super::super::pipeline::run_server_owned_completion(
|
super::super::pipeline::run_server_owned_completion(
|
||||||
@@ -1246,12 +1258,13 @@ mod tests {
|
|||||||
"abc123",
|
"abc123",
|
||||||
);
|
);
|
||||||
|
|
||||||
// Rate-limit exit handler: reset all three counters (the fix).
|
// Rate-limit exit handler: reset all counters (the fix).
|
||||||
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(story_id));
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryPending(story_id));
|
||||||
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryDiffFingerprint(
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryDiffFingerprint(
|
||||||
story_id,
|
story_id,
|
||||||
));
|
));
|
||||||
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id));
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryTotalAttempts(story_id));
|
||||||
|
crate::db::delete_content(crate::db::ContentKey::CommitRecoveryReadSet(story_id));
|
||||||
|
|
||||||
// CommitRecoveryPending must be cleared after each rate-limit exit.
|
// CommitRecoveryPending must be cleared after each rate-limit exit.
|
||||||
assert!(
|
assert!(
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ impl AgentRuntime for ClaudeCodeRuntime {
|
|||||||
// Abort+no-session: CLI crashed (e.g. SIGABRT) before emitting its
|
// Abort+no-session: CLI crashed (e.g. SIGABRT) before emitting its
|
||||||
// first "system" event. Detected by: non-zero exit AND no session.
|
// first "system" event. Detected by: non-zero exit AND no session.
|
||||||
aborted_signal: !result.exit_ok && result.session_id.is_none(),
|
aborted_signal: !result.exit_ok && result.session_id.is_none(),
|
||||||
|
exit_ok: result.exit_ok,
|
||||||
session_id: result.session_id,
|
session_id: result.session_id,
|
||||||
token_usage: result.token_usage,
|
token_usage: result.token_usage,
|
||||||
rate_limit_exit: result.rate_limit_exit,
|
rate_limit_exit: result.rate_limit_exit,
|
||||||
@@ -92,6 +93,7 @@ impl AgentRuntime for ClaudeCodeRuntime {
|
|||||||
Ok(RuntimeResult {
|
Ok(RuntimeResult {
|
||||||
aborted_signal: !fallback_result.exit_ok
|
aborted_signal: !fallback_result.exit_ok
|
||||||
&& fallback_result.session_id.is_none(),
|
&& fallback_result.session_id.is_none(),
|
||||||
|
exit_ok: fallback_result.exit_ok,
|
||||||
session_id: fallback_result.session_id,
|
session_id: fallback_result.session_id,
|
||||||
token_usage: fallback_result.token_usage,
|
token_usage: fallback_result.token_usage,
|
||||||
rate_limit_exit: fallback_result.rate_limit_exit,
|
rate_limit_exit: fallback_result.rate_limit_exit,
|
||||||
|
|||||||
@@ -135,6 +135,7 @@ impl AgentRuntime for GeminiRuntime {
|
|||||||
return Ok(RuntimeResult {
|
return Ok(RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: Some(total_usage),
|
token_usage: Some(total_usage),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
@@ -151,6 +152,7 @@ impl AgentRuntime for GeminiRuntime {
|
|||||||
return Ok(RuntimeResult {
|
return Ok(RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: Some(total_usage),
|
token_usage: Some(total_usage),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
@@ -254,6 +256,7 @@ impl AgentRuntime for GeminiRuntime {
|
|||||||
return Ok(RuntimeResult {
|
return Ok(RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: Some(total_usage),
|
token_usage: Some(total_usage),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
@@ -339,6 +342,7 @@ impl AgentRuntime for GeminiRuntime {
|
|||||||
Ok(RuntimeResult {
|
Ok(RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: Some(total_usage),
|
token_usage: Some(total_usage),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
|
|||||||
@@ -55,6 +55,12 @@ pub struct RuntimeContext {
|
|||||||
pub struct RuntimeResult {
|
pub struct RuntimeResult {
|
||||||
pub session_id: Option<String>,
|
pub session_id: Option<String>,
|
||||||
pub token_usage: Option<TokenUsage>,
|
pub token_usage: Option<TokenUsage>,
|
||||||
|
/// `true` when the process exited with exit code 0; `false` for non-zero exits
|
||||||
|
/// (API errors, network failures, or Claude-API-level budget exhaustion). Always
|
||||||
|
/// `true` for API-based runtimes (OpenAI, Gemini) which have no exit-code concept.
|
||||||
|
/// Used by the commit-recovery path to skip the stuck-respawn counter for forced
|
||||||
|
/// exits (story 1089, AC1).
|
||||||
|
pub exit_ok: bool,
|
||||||
/// `true` when the process exited with a failure AND no session was established.
|
/// `true` when the process exited with a failure AND no session was established.
|
||||||
///
|
///
|
||||||
/// This indicates the Claude Code CLI crashed (e.g. SIGABRT from an assertion
|
/// This indicates the Claude Code CLI crashed (e.g. SIGABRT from an assertion
|
||||||
@@ -169,6 +175,7 @@ mod tests {
|
|||||||
cache_read_input_tokens: 0,
|
cache_read_input_tokens: 0,
|
||||||
total_cost_usd: 0.01,
|
total_cost_usd: 0.01,
|
||||||
}),
|
}),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
@@ -186,6 +193,7 @@ mod tests {
|
|||||||
let result = RuntimeResult {
|
let result = RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: None,
|
token_usage: None,
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
|
|||||||
@@ -122,6 +122,7 @@ impl AgentRuntime for OpenAiRuntime {
|
|||||||
return Ok(RuntimeResult {
|
return Ok(RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: Some(total_usage),
|
token_usage: Some(total_usage),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
@@ -138,6 +139,7 @@ impl AgentRuntime for OpenAiRuntime {
|
|||||||
return Ok(RuntimeResult {
|
return Ok(RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: Some(total_usage),
|
token_usage: Some(total_usage),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
@@ -224,6 +226,7 @@ impl AgentRuntime for OpenAiRuntime {
|
|||||||
return Ok(RuntimeResult {
|
return Ok(RuntimeResult {
|
||||||
session_id: None,
|
session_id: None,
|
||||||
token_usage: Some(total_usage),
|
token_usage: Some(total_usage),
|
||||||
|
exit_ok: true,
|
||||||
aborted_signal: false,
|
aborted_signal: false,
|
||||||
rate_limit_exit: false,
|
rate_limit_exit: false,
|
||||||
rate_limit_reset_at: None,
|
rate_limit_reset_at: None,
|
||||||
|
|||||||
@@ -60,6 +60,17 @@ pub enum ContentKey<'a> {
|
|||||||
/// completion. Read by `get_merge_status` to surface gate output for the
|
/// completion. Read by `get_merge_status` to surface gate output for the
|
||||||
/// "completed" state without a separate MergeJob CRDT register (story 1036).
|
/// "completed" state without a separate MergeJob CRDT register (story 1036).
|
||||||
MergeReport(&'a str),
|
MergeReport(&'a str),
|
||||||
|
/// Flag written by spawn.rs when a coder session exits with a non-zero exit
|
||||||
|
/// code (API error, network failure, or Claude-API-level budget exhaustion).
|
||||||
|
/// Prevents the stuck-respawn counter from incrementing for forced exits —
|
||||||
|
/// only self-exits with no file or read changes count toward the cap.
|
||||||
|
/// Consumed (read + deleted) by the commit-recovery path in pipeline advance.
|
||||||
|
CommitRecoveryForcedExit(&'a str),
|
||||||
|
/// Cumulative set of files read across all commit-recovery sessions for a
|
||||||
|
/// story, stored as a newline-separated sorted list. Used to detect whether
|
||||||
|
/// the agent made read-exploration progress even when the worktree diff did
|
||||||
|
/// not grow (story 1089, AC2). Cleared when a commit lands or the story blocks.
|
||||||
|
CommitRecoveryReadSet(&'a str),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ContentKey<'a> {
|
impl<'a> ContentKey<'a> {
|
||||||
@@ -85,6 +96,10 @@ impl<'a> ContentKey<'a> {
|
|||||||
ContentKey::MergeFailureKind(id) => format!("{id}:merge_failure_kind"),
|
ContentKey::MergeFailureKind(id) => format!("{id}:merge_failure_kind"),
|
||||||
ContentKey::MergeSuccess(id) => format!("{id}:merge_success"),
|
ContentKey::MergeSuccess(id) => format!("{id}:merge_success"),
|
||||||
ContentKey::MergeReport(id) => format!("{id}:merge_report"),
|
ContentKey::MergeReport(id) => format!("{id}:merge_report"),
|
||||||
|
ContentKey::CommitRecoveryForcedExit(id) => {
|
||||||
|
format!("{id}:commit_recovery_forced_exit")
|
||||||
|
}
|
||||||
|
ContentKey::CommitRecoveryReadSet(id) => format!("{id}:commit_recovery_read_set"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user