diff --git a/server/src/agents/pty/mod.rs b/server/src/agents/pty/mod.rs index 2f56477b..a672b496 100644 --- a/server/src/agents/pty/mod.rs +++ b/server/src/agents/pty/mod.rs @@ -204,6 +204,57 @@ mod tests { } } + /// Story 916: a rate-limit hard block must extend the inactivity deadline + /// by the backoff duration so the watchdog doesn't kill the agent while + /// it's legitimately waiting for the limit to clear. + /// + /// Script emits a hard-block event with reset_at = now + 3s, then sleeps + /// 3s, then exits. With inactivity_timeout_secs = 1, the run would fail + /// without the extension; with it, the deadline is bumped to ~4s and the + /// script gets to complete cleanly. + #[tokio::test] + async fn rate_limit_hard_block_extends_inactivity_deadline() { + use std::os::unix::fs::PermissionsExt; + + let tmp = tempfile::tempdir().unwrap(); + let script = tmp.path().join("emit_then_wait.sh"); + let reset_at = chrono::Utc::now() + chrono::Duration::seconds(3); + let reset_at_str = reset_at.to_rfc3339_opts(chrono::SecondsFormat::Secs, true); + let body = format!( + "#!/bin/sh\nprintf '%s\\n' '{{\"type\":\"rate_limit_event\",\"rate_limit_info\":{{\"status\":\"hard_block\",\"reset_at\":\"{reset_at_str}\"}}}}'\nsleep 3\n" + ); + std::fs::write(&script, body).unwrap(); + std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap(); + + let (tx, _rx) = broadcast::channel::(64); + let (watcher_tx, _watcher_rx) = broadcast::channel::(16); + let event_log = Arc::new(Mutex::new(Vec::new())); + let child_killers = Arc::new(Mutex::new(HashMap::new())); + + let result = run_agent_pty_streaming( + "916_story_rate_limit_extension", + "mergemaster", + "sh", + &[script.to_string_lossy().to_string()], + "--", + "/tmp", + &tx, + &event_log, + None, + 1, // inactivity_timeout_secs = 1s; would expire before the 3s sleep without the extension + child_killers, + watcher_tx, + None, + ) + .await; + + assert!( + result.is_ok(), + "PTY run should not be killed by inactivity timeout during rate-limit block: {:?}", + result.err() + ); + } + #[test] fn test_emit_event_writes_to_log_writer() { let tmp = tempfile::tempdir().unwrap(); diff --git a/server/src/agents/pty/runner.rs b/server/src/agents/pty/runner.rs index 3c000edf..44929cbe 100644 --- a/server/src/agents/pty/runner.rs +++ b/server/src/agents/pty/runner.rs @@ -232,7 +232,7 @@ fn run_agent_pty_blocking( slog!("[agent:{sid_for_reader}:{aname_for_reader}] Reader thread exiting"); }); - let timeout_dur = if inactivity_timeout_secs > 0 { + let base_timeout = if inactivity_timeout_secs > 0 { Some(std::time::Duration::from_secs(inactivity_timeout_secs)) } else { None @@ -240,9 +240,24 @@ fn run_agent_pty_blocking( let mut session_id: Option = None; let mut token_usage: Option = None; + // When the agent receives a rate-limit hard block, claude-code waits + // internally for the limit to clear and emits no PTY output during that + // window. Extend each recv's deadline by `block_until - now` so the + // inactivity timeout doesn't fire while we're legitimately waiting on + // Anthropic. Once `block_until` passes (or the block is cleared by an + // earlier-than-expected emit), the extension implicitly drops back to 0 + // and the base inactivity timeout resumes (story 916). + let mut block_until: Option> = None; loop { - let recv_result = match timeout_dur { + let effective_timeout = base_timeout.map(|base| { + let extra = block_until + .and_then(|t| (t - chrono::Utc::now()).to_std().ok()) + .unwrap_or(std::time::Duration::ZERO); + base + extra + }); + + let recv_result = match effective_timeout { Some(dur) => line_rx.recv_timeout(dur), None => line_rx .recv() @@ -351,6 +366,12 @@ fn run_agent_pty_blocking( default } }; + // Pause the inactivity clock until the rate limit resets + // (story 916). Without this the watchdog kills the agent + // mid-wait — mergemaster especially, whose 15-minute + // inactivity window is shorter than typical rate-limit + // backoffs. + block_until = Some(reset_at); let _ = watcher_tx.send(WatcherEvent::RateLimitHardBlock { story_id: story_id.to_string(), agent_name: agent_name.to_string(),