huskies: merge 801
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
//! Event emission helpers: routing PTY output lines to the broadcast channel and log.
|
||||
use std::sync::Mutex;
|
||||
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use crate::agent_log::AgentLogWriter;
|
||||
use crate::agents::AgentEvent;
|
||||
|
||||
/// Dispatch a `stream_event` from Claude Code's `--include-partial-messages` output.
|
||||
///
|
||||
/// Extracts `thinking_delta` and `text_delta` from `content_block_delta` events
|
||||
/// and routes them as `AgentEvent::Thinking` and `AgentEvent::Output` respectively.
|
||||
/// This ensures thinking traces flow through the dedicated `ThinkingBlock` UI
|
||||
/// component rather than appearing as unbounded regular output.
|
||||
pub(super) fn handle_agent_stream_event(
|
||||
event: &serde_json::Value,
|
||||
story_id: &str,
|
||||
agent_name: &str,
|
||||
tx: &broadcast::Sender<AgentEvent>,
|
||||
event_log: &Mutex<Vec<AgentEvent>>,
|
||||
log_writer: Option<&Mutex<AgentLogWriter>>,
|
||||
) {
|
||||
let event_type = event.get("type").and_then(|t| t.as_str()).unwrap_or("");
|
||||
|
||||
if event_type == "content_block_delta"
|
||||
&& let Some(delta) = event.get("delta")
|
||||
{
|
||||
let delta_type = delta.get("type").and_then(|t| t.as_str()).unwrap_or("");
|
||||
match delta_type {
|
||||
"thinking_delta" => {
|
||||
if let Some(thinking) = delta.get("thinking").and_then(|t| t.as_str()) {
|
||||
emit_event(
|
||||
AgentEvent::Thinking {
|
||||
story_id: story_id.to_string(),
|
||||
agent_name: agent_name.to_string(),
|
||||
text: thinking.to_string(),
|
||||
},
|
||||
tx,
|
||||
event_log,
|
||||
log_writer,
|
||||
);
|
||||
}
|
||||
}
|
||||
"text_delta" => {
|
||||
if let Some(text) = delta.get("text").and_then(|t| t.as_str()) {
|
||||
emit_event(
|
||||
AgentEvent::Output {
|
||||
story_id: story_id.to_string(),
|
||||
agent_name: agent_name.to_string(),
|
||||
text: text.to_string(),
|
||||
},
|
||||
tx,
|
||||
event_log,
|
||||
log_writer,
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper to send an event to broadcast, event log, and optional persistent log file.
|
||||
pub(in crate::agents) fn emit_event(
|
||||
event: AgentEvent,
|
||||
tx: &broadcast::Sender<AgentEvent>,
|
||||
event_log: &Mutex<Vec<AgentEvent>>,
|
||||
log_writer: Option<&Mutex<AgentLogWriter>>,
|
||||
) {
|
||||
if let Ok(mut log) = event_log.lock() {
|
||||
log.push(event.clone());
|
||||
}
|
||||
if let Some(writer) = log_writer
|
||||
&& let Ok(mut w) = writer.lock()
|
||||
&& let Err(e) = w.write_event(&event)
|
||||
{
|
||||
eprintln!("[agent_log] Failed to write event to log file: {e}");
|
||||
}
|
||||
let _ = tx.send(event);
|
||||
}
|
||||
@@ -0,0 +1,328 @@
|
||||
//! PTY runner — spawns agent processes in pseudo-terminals and streams their output.
|
||||
|
||||
mod events;
|
||||
mod runner;
|
||||
mod types;
|
||||
|
||||
pub(in crate::agents) use events::emit_event;
|
||||
pub(in crate::agents) use runner::run_agent_pty_streaming;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::events::handle_agent_stream_event;
|
||||
use super::*;
|
||||
use crate::agents::AgentEvent;
|
||||
use crate::io::watcher::WatcherEvent;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
// ── AC1: pty detects rate_limit_event and emits RateLimitWarning ─────────
|
||||
|
||||
/// Verify that when a `rate_limit_event` JSON line appears in PTY output,
|
||||
/// `run_agent_pty_streaming` sends a `WatcherEvent::RateLimitWarning` with
|
||||
/// the correct story_id and agent_name.
|
||||
///
|
||||
/// The command invoked is: `sh -p -- <script>` where `--` terminates
|
||||
/// option parsing so the script path is treated as the operand.
|
||||
#[tokio::test]
|
||||
async fn rate_limit_event_json_sends_watcher_warning() {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let script = tmp.path().join("emit_rate_limit.sh");
|
||||
std::fs::write(
|
||||
&script,
|
||||
"#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"allowed_warning\"}}'\n",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
|
||||
|
||||
let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let (watcher_tx, mut watcher_rx) = broadcast::channel::<WatcherEvent>(16);
|
||||
let event_log = Arc::new(Mutex::new(Vec::new()));
|
||||
let child_killers = Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
// sh -p "--" <script>: -p = privileged mode, "--" = end options,
|
||||
// then the script path is the file operand.
|
||||
let result = run_agent_pty_streaming(
|
||||
"365_story_test",
|
||||
"coder-1",
|
||||
"sh",
|
||||
&[script.to_string_lossy().to_string()],
|
||||
"--",
|
||||
"/tmp",
|
||||
&tx,
|
||||
&event_log,
|
||||
None,
|
||||
0,
|
||||
child_killers,
|
||||
watcher_tx,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(result.is_ok(), "PTY run should succeed: {:?}", result.err());
|
||||
|
||||
let evt = watcher_rx
|
||||
.try_recv()
|
||||
.expect("Expected a RateLimitWarning to be sent on watcher_tx");
|
||||
match evt {
|
||||
WatcherEvent::RateLimitWarning {
|
||||
story_id,
|
||||
agent_name,
|
||||
} => {
|
||||
assert_eq!(story_id, "365_story_test");
|
||||
assert_eq!(agent_name, "coder-1");
|
||||
}
|
||||
other => panic!("Expected RateLimitWarning, got: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// AC1: hard block with `reset_at` emits `RateLimitHardBlock` with the
|
||||
/// correct story_id, agent_name, and parsed reset_at timestamp.
|
||||
#[tokio::test]
|
||||
async fn rate_limit_hard_block_sends_watcher_hard_block_event() {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let script = tmp.path().join("emit_hard_block.sh");
|
||||
std::fs::write(
|
||||
&script,
|
||||
"#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"hard_block\",\"reset_at\":\"2099-01-01T12:00:00Z\"}}'\n",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
|
||||
|
||||
let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let (watcher_tx, mut watcher_rx) = broadcast::channel::<WatcherEvent>(16);
|
||||
let event_log = Arc::new(Mutex::new(Vec::new()));
|
||||
let child_killers = Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
let result = run_agent_pty_streaming(
|
||||
"423_story_rate_limit",
|
||||
"coder-1",
|
||||
"sh",
|
||||
&[script.to_string_lossy().to_string()],
|
||||
"--",
|
||||
"/tmp",
|
||||
&tx,
|
||||
&event_log,
|
||||
None,
|
||||
0,
|
||||
child_killers,
|
||||
watcher_tx,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(result.is_ok(), "PTY run should succeed: {:?}", result.err());
|
||||
|
||||
let evt = watcher_rx
|
||||
.try_recv()
|
||||
.expect("Expected a RateLimitHardBlock to be sent on watcher_tx");
|
||||
match evt {
|
||||
WatcherEvent::RateLimitHardBlock {
|
||||
story_id,
|
||||
agent_name,
|
||||
reset_at,
|
||||
} => {
|
||||
assert_eq!(story_id, "423_story_rate_limit");
|
||||
assert_eq!(agent_name, "coder-1");
|
||||
assert_eq!(
|
||||
reset_at.to_rfc3339(),
|
||||
"2099-01-01T12:00:00+00:00",
|
||||
"reset_at should match the parsed timestamp"
|
||||
);
|
||||
}
|
||||
other => panic!("Expected RateLimitHardBlock, got: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Bug 496: hard block WITHOUT `reset_at` must still emit `RateLimitHardBlock`
|
||||
/// (not `RateLimitWarning`), using a default 5-minute backoff so the
|
||||
/// auto-scheduler can set a retry timer.
|
||||
#[tokio::test]
|
||||
async fn rate_limit_hard_block_without_reset_at_sends_hard_block_event() {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let script = tmp.path().join("emit_hard_block_no_reset.sh");
|
||||
std::fs::write(
|
||||
&script,
|
||||
"#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"rejected\"}}'\n",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
|
||||
|
||||
let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let (watcher_tx, mut watcher_rx) = broadcast::channel::<WatcherEvent>(16);
|
||||
let event_log = Arc::new(Mutex::new(Vec::new()));
|
||||
let child_killers = Arc::new(Mutex::new(HashMap::new()));
|
||||
|
||||
let before = chrono::Utc::now();
|
||||
let result = run_agent_pty_streaming(
|
||||
"496_bug_hard_rate_limit",
|
||||
"coder-1",
|
||||
"sh",
|
||||
&[script.to_string_lossy().to_string()],
|
||||
"--",
|
||||
"/tmp",
|
||||
&tx,
|
||||
&event_log,
|
||||
None,
|
||||
0,
|
||||
child_killers,
|
||||
watcher_tx,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let after = chrono::Utc::now();
|
||||
|
||||
assert!(result.is_ok(), "PTY run should succeed: {:?}", result.err());
|
||||
|
||||
let evt = watcher_rx
|
||||
.try_recv()
|
||||
.expect("Expected a RateLimitHardBlock to be sent on watcher_tx");
|
||||
match evt {
|
||||
WatcherEvent::RateLimitHardBlock {
|
||||
story_id,
|
||||
agent_name,
|
||||
reset_at,
|
||||
} => {
|
||||
assert_eq!(story_id, "496_bug_hard_rate_limit");
|
||||
assert_eq!(agent_name, "coder-1");
|
||||
// reset_at should be ~5 minutes from when the event fired
|
||||
let min_expected = before + chrono::Duration::minutes(4);
|
||||
let max_expected = after + chrono::Duration::minutes(6);
|
||||
assert!(
|
||||
reset_at >= min_expected && reset_at <= max_expected,
|
||||
"reset_at {reset_at} should be ~5 minutes from now"
|
||||
);
|
||||
}
|
||||
other => panic!("Expected RateLimitHardBlock (with default backoff), got: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_emit_event_writes_to_log_writer() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
|
||||
let log_writer =
|
||||
crate::agent_log::AgentLogWriter::new(root, "42_story_foo", "coder-1", "sess-emit")
|
||||
.unwrap();
|
||||
let log_mutex = Mutex::new(log_writer);
|
||||
|
||||
let (tx, _rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
|
||||
|
||||
let event = AgentEvent::Status {
|
||||
story_id: "42_story_foo".to_string(),
|
||||
agent_name: "coder-1".to_string(),
|
||||
status: "running".to_string(),
|
||||
};
|
||||
|
||||
emit_event(event, &tx, &event_log, Some(&log_mutex));
|
||||
|
||||
// Verify event was added to in-memory log
|
||||
let mem_events = event_log.lock().unwrap();
|
||||
assert_eq!(mem_events.len(), 1);
|
||||
drop(mem_events);
|
||||
|
||||
// Verify event was written to the log file
|
||||
let log_path =
|
||||
crate::agent_log::log_file_path(root, "42_story_foo", "coder-1", "sess-emit");
|
||||
let entries = crate::agent_log::read_log(&log_path).unwrap();
|
||||
assert_eq!(entries.len(), 1);
|
||||
assert_eq!(entries[0].event["type"], "status");
|
||||
assert_eq!(entries[0].event["status"], "running");
|
||||
}
|
||||
|
||||
// ── bug 167: handle_agent_stream_event routes thinking/text correctly ───
|
||||
|
||||
#[test]
|
||||
fn stream_event_thinking_delta_emits_thinking_event() {
|
||||
let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
|
||||
|
||||
let event = serde_json::json!({
|
||||
"type": "content_block_delta",
|
||||
"delta": {"type": "thinking_delta", "thinking": "Let me analyze this..."}
|
||||
});
|
||||
|
||||
handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
|
||||
|
||||
let received = rx.try_recv().unwrap();
|
||||
match received {
|
||||
AgentEvent::Thinking {
|
||||
story_id,
|
||||
agent_name,
|
||||
text,
|
||||
} => {
|
||||
assert_eq!(story_id, "s1");
|
||||
assert_eq!(agent_name, "coder-1");
|
||||
assert_eq!(text, "Let me analyze this...");
|
||||
}
|
||||
other => panic!("Expected Thinking event, got: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_event_text_delta_emits_output_event() {
|
||||
let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
|
||||
|
||||
let event = serde_json::json!({
|
||||
"type": "content_block_delta",
|
||||
"delta": {"type": "text_delta", "text": "Here is the result."}
|
||||
});
|
||||
|
||||
handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
|
||||
|
||||
let received = rx.try_recv().unwrap();
|
||||
match received {
|
||||
AgentEvent::Output {
|
||||
story_id,
|
||||
agent_name,
|
||||
text,
|
||||
} => {
|
||||
assert_eq!(story_id, "s1");
|
||||
assert_eq!(agent_name, "coder-1");
|
||||
assert_eq!(text, "Here is the result.");
|
||||
}
|
||||
other => panic!("Expected Output event, got: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_event_input_json_delta_ignored() {
|
||||
let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
|
||||
|
||||
let event = serde_json::json!({
|
||||
"type": "content_block_delta",
|
||||
"delta": {"type": "input_json_delta", "partial_json": "{\"file\":"}
|
||||
});
|
||||
|
||||
handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
|
||||
|
||||
// No event should be emitted for tool argument deltas
|
||||
assert!(rx.try_recv().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_event_non_delta_type_ignored() {
|
||||
let (tx, mut rx) = broadcast::channel::<AgentEvent>(64);
|
||||
let event_log: Mutex<Vec<AgentEvent>> = Mutex::new(Vec::new());
|
||||
|
||||
let event = serde_json::json!({
|
||||
"type": "message_start",
|
||||
"message": {"role": "assistant"}
|
||||
});
|
||||
|
||||
handle_agent_stream_event(&event, "s1", "coder-1", &tx, &event_log, None);
|
||||
|
||||
assert!(rx.try_recv().is_err());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,437 @@
|
||||
//! PTY process spawning and output loop: builds the command, drives the reader thread,
|
||||
//! and dispatches parsed JSON events to the broadcast channel.
|
||||
use std::collections::HashMap;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system};
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use crate::agent_log::AgentLogWriter;
|
||||
use crate::agents::{AgentEvent, TokenUsage};
|
||||
use crate::io::watcher::WatcherEvent;
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
use super::events::{emit_event, handle_agent_stream_event};
|
||||
use super::types::{ChildKillerGuard, PtyResult, composite_key};
|
||||
|
||||
/// Spawn claude agent in a PTY and stream events through the broadcast channel.
|
||||
///
|
||||
/// ## Bug 645: `output.write(&bytes).is_ok()` assertion in Claude Code CLI
|
||||
///
|
||||
/// The Claude Code CLI can panic with an `output.write(&bytes).is_ok()` assertion
|
||||
/// when writing to its stdout (the PTY slave end). This occurs inside the child
|
||||
/// process — not in this server code — when the PTY pipe breaks or fills. The
|
||||
/// `output` in the assertion is the CLI's stdout writer, and the write fails when
|
||||
/// the PTY master side is closed or the kernel pipe buffer is exhausted.
|
||||
///
|
||||
/// When this happens, the child process dies, the PTY reader thread in this
|
||||
/// function receives EOF, and `run_agent_pty_blocking` returns `Ok(PtyResult)`.
|
||||
/// The server then runs completion gates via `run_server_owned_completion`.
|
||||
///
|
||||
/// If the agent committed valid work before crashing, the "work survived" check
|
||||
/// in `pipeline::advance` detects the committed code and advances the story to
|
||||
/// QA instead of entering the retry/block path.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(in crate::agents) async fn run_agent_pty_streaming(
|
||||
story_id: &str,
|
||||
agent_name: &str,
|
||||
command: &str,
|
||||
args: &[String],
|
||||
prompt: &str,
|
||||
cwd: &str,
|
||||
tx: &broadcast::Sender<AgentEvent>,
|
||||
event_log: &Arc<Mutex<Vec<AgentEvent>>>,
|
||||
log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
|
||||
inactivity_timeout_secs: u64,
|
||||
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
watcher_tx: broadcast::Sender<WatcherEvent>,
|
||||
session_id_to_resume: Option<&str>,
|
||||
) -> Result<PtyResult, String> {
|
||||
let sid = story_id.to_string();
|
||||
let aname = agent_name.to_string();
|
||||
let cmd = command.to_string();
|
||||
let args = args.to_vec();
|
||||
let prompt = prompt.to_string();
|
||||
let cwd = cwd.to_string();
|
||||
let tx = tx.clone();
|
||||
let event_log = event_log.clone();
|
||||
let resume_sid = session_id_to_resume.map(str::to_string);
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
run_agent_pty_blocking(
|
||||
&sid,
|
||||
&aname,
|
||||
&cmd,
|
||||
&args,
|
||||
&prompt,
|
||||
&cwd,
|
||||
&tx,
|
||||
&event_log,
|
||||
log_writer.as_deref(),
|
||||
inactivity_timeout_secs,
|
||||
&child_killers,
|
||||
&watcher_tx,
|
||||
resume_sid.as_deref(),
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| format!("Agent task panicked: {e}"))?
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn run_agent_pty_blocking(
|
||||
story_id: &str,
|
||||
agent_name: &str,
|
||||
command: &str,
|
||||
args: &[String],
|
||||
prompt: &str,
|
||||
cwd: &str,
|
||||
tx: &broadcast::Sender<AgentEvent>,
|
||||
event_log: &Mutex<Vec<AgentEvent>>,
|
||||
log_writer: Option<&Mutex<AgentLogWriter>>,
|
||||
inactivity_timeout_secs: u64,
|
||||
child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
watcher_tx: &broadcast::Sender<WatcherEvent>,
|
||||
session_id_to_resume: Option<&str>,
|
||||
) -> Result<PtyResult, String> {
|
||||
let pty_system = native_pty_system();
|
||||
|
||||
let pair = pty_system
|
||||
.openpty(PtySize {
|
||||
rows: 50,
|
||||
cols: 200,
|
||||
pixel_width: 0,
|
||||
pixel_height: 0,
|
||||
})
|
||||
.map_err(|e| format!("Failed to open PTY: {e}"))?;
|
||||
|
||||
let mut cmd = CommandBuilder::new(command);
|
||||
|
||||
// Launch mode: resume an existing session or start fresh.
|
||||
if let Some(sid) = session_id_to_resume {
|
||||
// Resume: --resume <session_id> restores previous conversation context.
|
||||
// Only the failure context (prompt) is sent as a new message via -p.
|
||||
cmd.arg("--resume");
|
||||
cmd.arg(sid);
|
||||
if !prompt.is_empty() {
|
||||
cmd.arg("-p");
|
||||
cmd.arg(prompt);
|
||||
}
|
||||
} else {
|
||||
// Fresh session: deliver the full rendered prompt via -p.
|
||||
cmd.arg("-p");
|
||||
cmd.arg(prompt);
|
||||
}
|
||||
|
||||
// Add configured args (e.g., --directory /path/to/worktree, --model, etc.)
|
||||
for arg in args {
|
||||
cmd.arg(arg);
|
||||
}
|
||||
|
||||
cmd.arg("--output-format");
|
||||
cmd.arg("stream-json");
|
||||
cmd.arg("--verbose");
|
||||
// Enable partial streaming so we receive thinking_delta and text_delta
|
||||
// events in real-time, rather than only complete assistant events.
|
||||
// Without this, thinking traces may not appear in the structured output
|
||||
// and instead leak as unstructured PTY text.
|
||||
cmd.arg("--include-partial-messages");
|
||||
|
||||
// Agents use acceptEdits so file edits are auto-approved while other
|
||||
// tools (e.g. Bash) trigger the permission prompt tool, which auto-denies
|
||||
// for agents. The worktree's .claude/settings.json allowlist further
|
||||
// controls which tools are pre-approved.
|
||||
cmd.arg("--permission-mode");
|
||||
cmd.arg("acceptEdits");
|
||||
cmd.arg("--permission-prompt-tool");
|
||||
cmd.arg("mcp__huskies__prompt_permission");
|
||||
|
||||
cmd.cwd(cwd);
|
||||
cmd.env("NO_COLOR", "1");
|
||||
|
||||
// Allow spawning Claude Code from within a Claude Code session
|
||||
cmd.env_remove("CLAUDECODE");
|
||||
cmd.env_remove("CLAUDE_CODE_ENTRYPOINT");
|
||||
|
||||
// Count existing session files for this worktree to detect budget exhaustion.
|
||||
let session_dir = format!(
|
||||
"/home/huskies/.claude/projects/-workspace--huskies-worktrees-{}/",
|
||||
story_id.replace(['_', '.'], "-")
|
||||
);
|
||||
let session_count = std::fs::read_dir(&session_dir)
|
||||
.map(|d| {
|
||||
d.filter(|e| {
|
||||
e.as_ref()
|
||||
.map(|e| e.path().extension().is_some_and(|ext| ext == "jsonl"))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.count()
|
||||
})
|
||||
.unwrap_or(0);
|
||||
let session_bytes: u64 = std::fs::read_dir(&session_dir)
|
||||
.map(|d| {
|
||||
d.filter_map(|e| e.ok())
|
||||
.filter(|e| e.path().extension().is_some_and(|ext| ext == "jsonl"))
|
||||
.filter_map(|e| e.metadata().ok())
|
||||
.map(|m| m.len())
|
||||
.sum()
|
||||
})
|
||||
.unwrap_or(0);
|
||||
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] Spawning {command} in {cwd} with args: {args:?} \
|
||||
(prior_sessions={session_count}, session_log_bytes={session_bytes})"
|
||||
);
|
||||
|
||||
let mut child = pair
|
||||
.slave
|
||||
.spawn_command(cmd)
|
||||
.map_err(|e| format!("Failed to spawn agent for {story_id}:{agent_name}: {e}"))?;
|
||||
|
||||
// Register the child killer so that kill_all_children() / stop_agent() can
|
||||
// terminate this process on server shutdown, even if the blocking thread
|
||||
// cannot be interrupted. The ChildKillerGuard deregisters on function exit.
|
||||
let killer_key = composite_key(story_id, agent_name);
|
||||
{
|
||||
let killer = child.clone_killer();
|
||||
if let Ok(mut killers) = child_killers.lock() {
|
||||
killers.insert(killer_key.clone(), killer);
|
||||
}
|
||||
}
|
||||
let _killer_guard = ChildKillerGuard {
|
||||
killers: Arc::clone(child_killers),
|
||||
key: killer_key,
|
||||
};
|
||||
|
||||
drop(pair.slave);
|
||||
|
||||
let reader = pair
|
||||
.master
|
||||
.try_clone_reader()
|
||||
.map_err(|e| format!("Failed to clone PTY reader: {e}"))?;
|
||||
|
||||
drop(pair.master);
|
||||
|
||||
// Spawn a reader thread to collect PTY output lines.
|
||||
// We use a channel so the main thread can apply an inactivity deadline
|
||||
// via recv_timeout: if no output arrives within the configured window
|
||||
// the process is killed and the agent is marked Failed.
|
||||
let (line_tx, line_rx) = std::sync::mpsc::channel::<std::io::Result<String>>();
|
||||
let sid_for_reader = story_id.to_string();
|
||||
let aname_for_reader = agent_name.to_string();
|
||||
let reader_handle = std::thread::spawn(move || {
|
||||
let buf_reader = BufReader::new(reader);
|
||||
for line in buf_reader.lines() {
|
||||
if line_tx.send(line).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
slog!("[agent:{sid_for_reader}:{aname_for_reader}] Reader thread exiting");
|
||||
});
|
||||
|
||||
let timeout_dur = if inactivity_timeout_secs > 0 {
|
||||
Some(std::time::Duration::from_secs(inactivity_timeout_secs))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut session_id: Option<String> = None;
|
||||
let mut token_usage: Option<TokenUsage> = None;
|
||||
|
||||
loop {
|
||||
let recv_result = match timeout_dur {
|
||||
Some(dur) => line_rx.recv_timeout(dur),
|
||||
None => line_rx
|
||||
.recv()
|
||||
.map_err(|_| std::sync::mpsc::RecvTimeoutError::Disconnected),
|
||||
};
|
||||
|
||||
let line = match recv_result {
|
||||
Ok(Ok(l)) => l,
|
||||
Ok(Err(_)) => {
|
||||
// IO error reading from PTY — treat as EOF.
|
||||
break;
|
||||
}
|
||||
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => {
|
||||
// Reader thread exited (EOF from PTY).
|
||||
break;
|
||||
}
|
||||
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
|
||||
slog_warn!(
|
||||
"[agent:{story_id}:{agent_name}] Inactivity timeout after \
|
||||
{inactivity_timeout_secs}s with no output. Killing process."
|
||||
);
|
||||
let _ = child.kill();
|
||||
let _ = child.wait();
|
||||
return Err(format!(
|
||||
"Agent inactivity timeout: no output received for {inactivity_timeout_secs}s"
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to parse as JSON
|
||||
let json: serde_json::Value = match serde_json::from_str(trimmed) {
|
||||
Ok(j) => j,
|
||||
Err(_) => {
|
||||
// Non-JSON output (terminal escapes etc.) — send as raw output
|
||||
emit_event(
|
||||
AgentEvent::Output {
|
||||
story_id: story_id.to_string(),
|
||||
agent_name: agent_name.to_string(),
|
||||
text: trimmed.to_string(),
|
||||
},
|
||||
tx,
|
||||
event_log,
|
||||
log_writer,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let event_type = json.get("type").and_then(|t| t.as_str()).unwrap_or("");
|
||||
|
||||
match event_type {
|
||||
"system" => {
|
||||
session_id = json
|
||||
.get("session_id")
|
||||
.and_then(|s| s.as_str())
|
||||
.map(|s| s.to_string());
|
||||
}
|
||||
// With --include-partial-messages, thinking and text arrive
|
||||
// incrementally via stream_event → content_block_delta. Handle
|
||||
// them here for real-time streaming to the frontend.
|
||||
"stream_event" => {
|
||||
if let Some(event) = json.get("event") {
|
||||
handle_agent_stream_event(
|
||||
event, story_id, agent_name, tx, event_log, log_writer,
|
||||
);
|
||||
}
|
||||
}
|
||||
// Complete assistant events are skipped for content extraction
|
||||
// because thinking and text already arrived via stream_event.
|
||||
// The raw JSON is still forwarded as AgentJson below.
|
||||
"assistant" | "user" => {}
|
||||
"rate_limit_event" => {
|
||||
let rate_limit_info = json.get("rate_limit_info");
|
||||
let status = rate_limit_info
|
||||
.and_then(|i| i.get("status"))
|
||||
.and_then(|s| s.as_str())
|
||||
.unwrap_or("");
|
||||
let is_hard_block = !status.is_empty() && status != "allowed_warning";
|
||||
let reset_at = rate_limit_info
|
||||
.and_then(|i| i.get("reset_at"))
|
||||
.and_then(|r| r.as_str())
|
||||
.and_then(|r| chrono::DateTime::parse_from_rfc3339(r).ok())
|
||||
.map(|dt| dt.with_timezone(&chrono::Utc));
|
||||
|
||||
if is_hard_block {
|
||||
let reset_at = match reset_at {
|
||||
Some(t) => {
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] API rate limit hard block \
|
||||
(status={status}); resets at {t}"
|
||||
);
|
||||
t
|
||||
}
|
||||
None => {
|
||||
let default = chrono::Utc::now() + chrono::Duration::minutes(5);
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] API rate limit hard block \
|
||||
(status={status}); no reset_at in rate_limit_info, \
|
||||
defaulting to 5-minute backoff ({default})"
|
||||
);
|
||||
default
|
||||
}
|
||||
};
|
||||
let _ = watcher_tx.send(WatcherEvent::RateLimitHardBlock {
|
||||
story_id: story_id.to_string(),
|
||||
agent_name: agent_name.to_string(),
|
||||
reset_at,
|
||||
});
|
||||
} else {
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] API rate limit warning received \
|
||||
(status={status})"
|
||||
);
|
||||
let _ = watcher_tx.send(WatcherEvent::RateLimitWarning {
|
||||
story_id: story_id.to_string(),
|
||||
agent_name: agent_name.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
"result" => {
|
||||
// Extract token usage from the result event.
|
||||
if let Some(usage) = TokenUsage::from_result_event(&json) {
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] Token usage: in={} out={} cache_create={} cache_read={} cost=${:.4}",
|
||||
usage.input_tokens,
|
||||
usage.output_tokens,
|
||||
usage.cache_creation_input_tokens,
|
||||
usage.cache_read_input_tokens,
|
||||
usage.total_cost_usd,
|
||||
);
|
||||
token_usage = Some(usage);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Forward all JSON events
|
||||
emit_event(
|
||||
AgentEvent::AgentJson {
|
||||
story_id: story_id.to_string(),
|
||||
agent_name: agent_name.to_string(),
|
||||
data: json,
|
||||
},
|
||||
tx,
|
||||
event_log,
|
||||
log_writer,
|
||||
);
|
||||
}
|
||||
|
||||
let _ = child.kill();
|
||||
let wait_result = child.wait();
|
||||
match &wait_result {
|
||||
Ok(status) => {
|
||||
slog!("[agent:{story_id}:{agent_name}] Child exited: {status:?}");
|
||||
}
|
||||
Err(e) => {
|
||||
slog!("[agent:{story_id}:{agent_name}] Child wait error: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the reader thread to finish so it releases the cloned PTY
|
||||
// master fd before we return. Without this, the next PTY spawn for the
|
||||
// same story can collide with a still-open fd from this session (#453).
|
||||
if let Err(e) = reader_handle.join() {
|
||||
slog!("[agent:{story_id}:{agent_name}] Reader thread panicked: {e:?}");
|
||||
}
|
||||
|
||||
// Log whether session was created — Session: None indicates CLI died
|
||||
// before emitting any events (possible causes: rate limit, budget
|
||||
// exhaustion, PTY write failure, CLI crash).
|
||||
if session_id.is_none() {
|
||||
slog_warn!(
|
||||
"[agent:{story_id}:{agent_name}] SESSION NONE: CLI exited without creating a session. \
|
||||
Check for 'fatal runtime error' in agent logs. \
|
||||
prior_sessions={session_count}, session_log_bytes={session_bytes}"
|
||||
);
|
||||
}
|
||||
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] Done. Session: {:?}",
|
||||
session_id
|
||||
);
|
||||
|
||||
Ok(PtyResult {
|
||||
session_id,
|
||||
token_usage,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
//! Core types for the PTY runner: result container and process lifecycle helpers.
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use portable_pty::ChildKiller;
|
||||
|
||||
use crate::agents::TokenUsage;
|
||||
|
||||
/// Result from a PTY agent session, containing the session ID and token usage.
|
||||
pub(in crate::agents) struct PtyResult {
|
||||
pub session_id: Option<String>,
|
||||
pub token_usage: Option<TokenUsage>,
|
||||
}
|
||||
|
||||
pub(super) fn composite_key(story_id: &str, agent_name: &str) -> String {
|
||||
format!("{story_id}:{agent_name}")
|
||||
}
|
||||
|
||||
pub(super) struct ChildKillerGuard {
|
||||
pub killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
impl Drop for ChildKillerGuard {
|
||||
fn drop(&mut self) {
|
||||
if let Ok(mut killers) = self.killers.lock() {
|
||||
killers.remove(&self.key);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user