Spike: PTY-based Claude Code integration with multi-agent concurrency

Proves that spawning `claude -p` in a pseudo-terminal from Rust gets Max
subscription billing (apiKeySource: "none", rateLimitType: "five_hour")
instead of per-token API charges. Concurrent agents run in parallel PTY
sessions with session resumption via --resume for multi-turn conversations.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dave
2026-02-19 15:25:22 +00:00
parent 8f0bc971bf
commit 68a19c393e
17 changed files with 1159 additions and 22 deletions

View File

@@ -189,12 +189,55 @@ where
.clone()
.unwrap_or_else(|| "http://localhost:11434".to_string());
let is_claude = config.model.starts_with("claude-");
eprintln!("[chat] provider={} model={}", config.provider, config.model);
let is_claude_code = config.provider == "claude-code";
let is_claude = !is_claude_code && config.model.starts_with("claude-");
if !is_claude && config.provider.as_str() != "ollama" {
if !is_claude_code && !is_claude && config.provider.as_str() != "ollama" {
return Err(format!("Unsupported provider: {}", config.provider));
}
// Claude Code provider: bypasses our tool loop entirely.
// Claude Code has its own agent loop, tools, and context management.
// We just pipe the user message in and stream raw output back.
if is_claude_code {
use crate::llm::providers::claude_code::ClaudeCodeProvider;
let user_message = messages
.iter()
.rev()
.find(|m| m.role == Role::User)
.map(|m| m.content.clone())
.ok_or_else(|| "No user message found".to_string())?;
let project_root = state
.get_project_root()
.unwrap_or_else(|_| std::path::PathBuf::from("."));
let provider = ClaudeCodeProvider::new();
let response = provider
.chat_stream(
&user_message,
&project_root.to_string_lossy(),
&mut cancel_rx,
|token| on_token(token),
)
.await
.map_err(|e| format!("Claude Code Error: {e}"))?;
let assistant_msg = Message {
role: Role::Assistant,
content: response.content.unwrap_or_default(),
tool_calls: None,
tool_call_id: None,
};
let mut result = messages.clone();
result.push(assistant_msg);
on_update(&result);
return Ok(result);
}
let tool_defs = get_tool_definitions();
let tools = if config.enable_tools.unwrap_or(true) {
tool_defs.as_slice()

View File

@@ -0,0 +1,299 @@
use portable_pty::{CommandBuilder, PtySize, native_pty_system};
use std::io::{BufRead, BufReader};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use tokio::sync::watch;
use crate::llm::types::CompletionResponse;
/// Manages a Claude Code session via a pseudo-terminal.
///
/// Spawns `claude -p` in a PTY so isatty() returns true (which may
/// influence billing), while using `--output-format stream-json` to
/// get clean, structured NDJSON output instead of TUI escape sequences.
pub struct ClaudeCodeProvider;
impl ClaudeCodeProvider {
pub fn new() -> Self {
Self
}
pub async fn chat_stream<F>(
&self,
user_message: &str,
project_root: &str,
cancel_rx: &mut watch::Receiver<bool>,
mut on_token: F,
) -> Result<CompletionResponse, String>
where
F: FnMut(&str) + Send,
{
let message = user_message.to_string();
let cwd = project_root.to_string();
let cancelled = Arc::new(AtomicBool::new(false));
let cancelled_clone = cancelled.clone();
let mut cancel_watch = cancel_rx.clone();
tokio::spawn(async move {
while cancel_watch.changed().await.is_ok() {
if *cancel_watch.borrow() {
cancelled_clone.store(true, Ordering::Relaxed);
break;
}
}
});
let (token_tx, mut token_rx) = tokio::sync::mpsc::unbounded_channel::<String>();
let pty_handle = tokio::task::spawn_blocking(move || {
run_pty_session(&message, &cwd, cancelled, token_tx)
});
let mut full_output = String::new();
while let Some(token) = token_rx.recv().await {
full_output.push_str(&token);
on_token(&token);
}
pty_handle
.await
.map_err(|e| format!("PTY task panicked: {e}"))??;
Ok(CompletionResponse {
content: Some(full_output),
tool_calls: None,
})
}
}
/// Run `claude -p` with stream-json output inside a PTY.
///
/// The PTY makes isatty() return true. The `-p` flag gives us
/// single-shot non-interactive mode with structured output.
fn run_pty_session(
user_message: &str,
cwd: &str,
cancelled: Arc<AtomicBool>,
token_tx: tokio::sync::mpsc::UnboundedSender<String>,
) -> Result<(), String> {
let pty_system = native_pty_system();
let pair = pty_system
.openpty(PtySize {
rows: 50,
cols: 200,
pixel_width: 0,
pixel_height: 0,
})
.map_err(|e| format!("Failed to open PTY: {e}"))?;
let mut cmd = CommandBuilder::new("claude");
cmd.arg("-p");
cmd.arg(user_message);
cmd.arg("--output-format");
cmd.arg("stream-json");
cmd.arg("--verbose");
cmd.cwd(cwd);
// Keep TERM reasonable but disable color
cmd.env("NO_COLOR", "1");
eprintln!("[pty-debug] Spawning: claude -p \"{}\" --output-format stream-json --verbose", user_message);
let mut child = pair
.slave
.spawn_command(cmd)
.map_err(|e| format!("Failed to spawn claude: {e}"))?;
eprintln!("[pty-debug] Process spawned, pid: {:?}", child.process_id());
drop(pair.slave);
let reader = pair
.master
.try_clone_reader()
.map_err(|e| format!("Failed to clone PTY reader: {e}"))?;
// We don't need to write anything — -p mode takes prompt as arg
drop(pair.master);
// Read NDJSON lines from stdout
let (line_tx, line_rx) = std::sync::mpsc::channel::<Option<String>>();
std::thread::spawn(move || {
let buf_reader = BufReader::new(reader);
eprintln!("[pty-debug] Reader thread started");
for line in buf_reader.lines() {
match line {
Ok(l) => {
eprintln!("[pty-debug] raw line: {}", l);
if line_tx.send(Some(l)).is_err() {
break;
}
}
Err(e) => {
eprintln!("[pty-debug] read error: {e}");
let _ = line_tx.send(None);
break;
}
}
}
eprintln!("[pty-debug] Reader thread done");
let _ = line_tx.send(None);
});
let mut got_result = false;
loop {
if cancelled.load(Ordering::Relaxed) {
let _ = child.kill();
return Err("Cancelled".to_string());
}
match line_rx.recv_timeout(std::time::Duration::from_millis(500)) {
Ok(Some(line)) => {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
eprintln!("[pty-debug] processing: {}...", &trimmed[..trimmed.len().min(120)]);
// Try to parse as JSON
if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
if let Some(event_type) = json.get("type").and_then(|t| t.as_str()) {
match event_type {
// Streaming deltas (when --include-partial-messages is used)
"stream_event" => {
if let Some(event) = json.get("event") {
handle_stream_event(event, &token_tx);
}
}
// Complete assistant message
"assistant" => {
if let Some(message) = json.get("message") {
if let Some(content) = message.get("content").and_then(|c| c.as_array()) {
for block in content {
if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
let _ = token_tx.send(text.to_string());
}
}
}
}
}
// Final result with usage stats
"result" => {
if let Some(cost) = json.get("total_cost_usd").and_then(|c| c.as_f64()) {
let _ = token_tx.send(format!("\n\n---\n_Cost: ${cost:.4}_\n"));
}
if let Some(usage) = json.get("usage") {
let input = usage.get("input_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
let output = usage.get("output_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
let cached = usage.get("cache_read_input_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
let _ = token_tx.send(format!("_Tokens: {input} in / {output} out / {cached} cached_\n"));
}
got_result = true;
}
// System init — log billing info
"system" => {
let api_source = json.get("apiKeySource").and_then(|s| s.as_str()).unwrap_or("unknown");
let model = json.get("model").and_then(|s| s.as_str()).unwrap_or("unknown");
let _ = token_tx.send(format!("_[{model} | apiKey: {api_source}]_\n\n"));
}
// Rate limit info
"rate_limit_event" => {
if let Some(info) = json.get("rate_limit_info") {
let status = info.get("status").and_then(|s| s.as_str()).unwrap_or("unknown");
let limit_type = info.get("rateLimitType").and_then(|s| s.as_str()).unwrap_or("unknown");
let _ = token_tx.send(format!("_[rate limit: {status} ({limit_type})]_\n\n"));
}
}
_ => {}
}
}
}
// Ignore non-JSON lines (terminal escape sequences)
if got_result {
break;
}
}
Ok(None) => break, // EOF
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
// Check if child has exited
if let Ok(Some(_status)) = child.try_wait() {
// Drain remaining lines
while let Ok(Some(line)) = line_rx.try_recv() {
let trimmed = line.trim();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
if let Some(event) = json
.get("type")
.filter(|t| t.as_str() == Some("stream_event"))
.and_then(|_| json.get("event"))
{
handle_stream_event(event, &token_tx);
}
}
}
break;
}
}
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break,
}
// Don't set got_result here — just let the process finish naturally
let _ = got_result;
}
let _ = child.kill();
Ok(())
}
/// Extract text from a stream event and send to the token channel.
fn handle_stream_event(
event: &serde_json::Value,
token_tx: &tokio::sync::mpsc::UnboundedSender<String>,
) {
let event_type = event.get("type").and_then(|t| t.as_str()).unwrap_or("");
match event_type {
// Text content streaming
"content_block_delta" => {
if let Some(delta) = event.get("delta") {
let delta_type = delta.get("type").and_then(|t| t.as_str()).unwrap_or("");
match delta_type {
"text_delta" => {
if let Some(text) = delta.get("text").and_then(|t| t.as_str()) {
let _ = token_tx.send(text.to_string());
}
}
"thinking_delta" => {
if let Some(thinking) = delta.get("thinking").and_then(|t| t.as_str()) {
let _ = token_tx.send(format!("[thinking] {thinking}"));
}
}
_ => {}
}
}
}
// Message complete — log usage info
"message_delta" => {
if let Some(usage) = event.get("usage") {
let output_tokens = usage
.get("output_tokens")
.and_then(|t| t.as_u64())
.unwrap_or(0);
let _ = token_tx.send(format!("\n[tokens: {output_tokens} output]\n"));
}
}
// Log errors
"error" => {
if let Some(error) = event.get("error") {
let msg = error
.get("message")
.and_then(|m| m.as_str())
.unwrap_or("unknown error");
let _ = token_tx.send(format!("\n[error: {msg}]\n"));
}
}
_ => {}
}
}

View File

@@ -1,2 +1,3 @@
pub mod anthropic;
pub mod claude_code;
pub mod ollama;