huskies: merge 967

This commit is contained in:
dave
2026-05-13 12:34:35 +00:00
parent 40ea100eae
commit 93f774fcbb
9 changed files with 165 additions and 0 deletions
+22
View File
@@ -33,6 +33,16 @@ use super::types::{ChildKillerGuard, PtyResult, composite_key};
/// If the agent committed valid work before crashing, the "work survived" check
/// in `pipeline::advance` detects the committed code and advances the story to
/// QA instead of entering the retry/block path.
///
/// ## `eager_record` — watchdog-kill race fix (bug 967)
///
/// When `Some((project_root, model))` is passed, the blocking thread calls
/// `session_store::record_session()` immediately when the `"system"` JSON event
/// is parsed. This runs inside the OS blocking thread, which cannot be
/// cancelled by a tokio task abort. If the watchdog later kills the PTY child
/// and aborts the spawned tokio task, the session_id is already persisted and
/// the respawn's `lookup_session()` returns it (warm start instead of cold).
/// Pass `None` when session persistence is not needed (e.g. in tests).
#[allow(clippy::too_many_arguments)]
pub(in crate::agents) async fn run_agent_pty_streaming(
story_id: &str,
@@ -48,6 +58,7 @@ pub(in crate::agents) async fn run_agent_pty_streaming(
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
watcher_tx: broadcast::Sender<WatcherEvent>,
session_id_to_resume: Option<&str>,
eager_record: Option<(std::path::PathBuf, String)>,
) -> Result<PtyResult, String> {
let sid = story_id.to_string();
let aname = agent_name.to_string();
@@ -74,6 +85,7 @@ pub(in crate::agents) async fn run_agent_pty_streaming(
&child_killers,
&watcher_tx,
resume_sid.as_deref(),
eager_record,
)
})
.await
@@ -95,6 +107,7 @@ fn run_agent_pty_blocking(
child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
watcher_tx: &broadcast::Sender<WatcherEvent>,
session_id_to_resume: Option<&str>,
eager_record: Option<(std::path::PathBuf, String)>,
) -> Result<PtyResult, String> {
let pty_system = native_pty_system();
@@ -319,6 +332,15 @@ fn run_agent_pty_blocking(
.get("session_id")
.and_then(|s| s.as_str())
.map(|s| s.to_string());
// Eagerly persist the session_id so it survives a watchdog kill
// that aborts the tokio task before run_agent_spawn's
// record_session() call (bug 967). Runs in the OS blocking
// thread — not cancellable by tokio task abort.
if let (Some(sid), Some((root, model))) = (&session_id, &eager_record) {
crate::agents::session_store::record_session(
root, story_id, agent_name, model, sid,
);
}
}
// With --include-partial-messages, thinking and text arrive
// incrementally via stream_event → content_block_delta. Handle