huskies: merge 967
This commit is contained in:
@@ -33,6 +33,16 @@ use super::types::{ChildKillerGuard, PtyResult, composite_key};
|
||||
/// If the agent committed valid work before crashing, the "work survived" check
|
||||
/// in `pipeline::advance` detects the committed code and advances the story to
|
||||
/// QA instead of entering the retry/block path.
|
||||
///
|
||||
/// ## `eager_record` — watchdog-kill race fix (bug 967)
|
||||
///
|
||||
/// When `Some((project_root, model))` is passed, the blocking thread calls
|
||||
/// `session_store::record_session()` immediately when the `"system"` JSON event
|
||||
/// is parsed. This runs inside the OS blocking thread, which cannot be
|
||||
/// cancelled by a tokio task abort. If the watchdog later kills the PTY child
|
||||
/// and aborts the spawned tokio task, the session_id is already persisted and
|
||||
/// the respawn's `lookup_session()` returns it (warm start instead of cold).
|
||||
/// Pass `None` when session persistence is not needed (e.g. in tests).
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(in crate::agents) async fn run_agent_pty_streaming(
|
||||
story_id: &str,
|
||||
@@ -48,6 +58,7 @@ pub(in crate::agents) async fn run_agent_pty_streaming(
|
||||
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
watcher_tx: broadcast::Sender<WatcherEvent>,
|
||||
session_id_to_resume: Option<&str>,
|
||||
eager_record: Option<(std::path::PathBuf, String)>,
|
||||
) -> Result<PtyResult, String> {
|
||||
let sid = story_id.to_string();
|
||||
let aname = agent_name.to_string();
|
||||
@@ -74,6 +85,7 @@ pub(in crate::agents) async fn run_agent_pty_streaming(
|
||||
&child_killers,
|
||||
&watcher_tx,
|
||||
resume_sid.as_deref(),
|
||||
eager_record,
|
||||
)
|
||||
})
|
||||
.await
|
||||
@@ -95,6 +107,7 @@ fn run_agent_pty_blocking(
|
||||
child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
watcher_tx: &broadcast::Sender<WatcherEvent>,
|
||||
session_id_to_resume: Option<&str>,
|
||||
eager_record: Option<(std::path::PathBuf, String)>,
|
||||
) -> Result<PtyResult, String> {
|
||||
let pty_system = native_pty_system();
|
||||
|
||||
@@ -319,6 +332,15 @@ fn run_agent_pty_blocking(
|
||||
.get("session_id")
|
||||
.and_then(|s| s.as_str())
|
||||
.map(|s| s.to_string());
|
||||
// Eagerly persist the session_id so it survives a watchdog kill
|
||||
// that aborts the tokio task before run_agent_spawn's
|
||||
// record_session() call (bug 967). Runs in the OS blocking
|
||||
// thread — not cancellable by tokio task abort.
|
||||
if let (Some(sid), Some((root, model))) = (&session_id, &eager_record) {
|
||||
crate::agents::session_store::record_session(
|
||||
root, story_id, agent_name, model, sid,
|
||||
);
|
||||
}
|
||||
}
|
||||
// With --include-partial-messages, thinking and text arrive
|
||||
// incrementally via stream_event → content_block_delta. Handle
|
||||
|
||||
Reference in New Issue
Block a user