diff --git a/server/src/agents/pool/start/spawn.rs b/server/src/agents/pool/start/spawn.rs index 43238e64..946d6b24 100644 --- a/server/src/agents/pool/start/spawn.rs +++ b/server/src/agents/pool/start/spawn.rs @@ -463,10 +463,19 @@ pub(super) async fn run_agent_spawn( reason, }); } else { + // Prune session_store entries for this story so the next + // spawn starts cold (no `--resume` flag). The crash likely + // came from claude-code choking on the bloated stdio + // replay; resuming again would re-trigger the same abort. + crate::agents::session_store::remove_sessions_for_story( + &project_root_clone, + &sid, + ); slog!( "[agents] CLI crashed before session for '{sid}:{aname}' \ (abort respawn {count}/{ABORT_RESPAWN_CAP}). \ - Respawning without consuming a retry slot." + Pruned session_store and respawning cold without \ + consuming a retry slot." ); let agents_for_respawn = Arc::clone(&agents_ref); let watcher_for_respawn = watcher_tx_clone.clone(); diff --git a/server/src/agents/session_store.rs b/server/src/agents/session_store.rs index 0a3f2c03..717e556f 100644 --- a/server/src/agents/session_store.rs +++ b/server/src/agents/session_store.rs @@ -73,8 +73,12 @@ pub fn lookup_session( read_store(project_root).get(&key).cloned() } -/// Remove all session entries for a story (called when a story reaches done/archived). -#[cfg(test)] +/// Remove all session entries for a story. +/// +/// Called when the story reaches done/archived, OR when claude-code keeps +/// crashing on session resume — in the latter case the next spawn must start +/// cold (no `--resume` flag) so the bloated stdio replay doesn't re-trigger +/// the same abort. See bug 882 follow-up. pub fn remove_sessions_for_story(project_root: &Path, story_id: &str) { let mut data = read_store(project_root); let prefix = format!("{story_id}:");