diff --git a/server/src/agents/mod.rs b/server/src/agents/mod.rs index 3c1373c..6b644a3 100644 --- a/server/src/agents/mod.rs +++ b/server/src/agents/mod.rs @@ -3,9 +3,10 @@ pub mod lifecycle; pub mod merge; mod pool; mod pty; +pub mod token_usage; use crate::config::AgentConfig; -use serde::Serialize; +use serde::{Deserialize, Serialize}; pub use lifecycle::{ close_bug_to_archive, feature_branch_has_unmerged_changes, move_story_to_archived, @@ -136,6 +137,45 @@ pub struct CompletionReport { pub gate_output: String, } +/// Token usage from a Claude Code session's `result` event. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct TokenUsage { + pub input_tokens: u64, + pub output_tokens: u64, + pub cache_creation_input_tokens: u64, + pub cache_read_input_tokens: u64, + pub total_cost_usd: f64, +} + +impl TokenUsage { + /// Parse token usage from a Claude Code `result` JSON event. + pub fn from_result_event(json: &serde_json::Value) -> Option { + let usage = json.get("usage")?; + Some(Self { + input_tokens: usage + .get("input_tokens") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + output_tokens: usage + .get("output_tokens") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + cache_creation_input_tokens: usage + .get("cache_creation_input_tokens") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + cache_read_input_tokens: usage + .get("cache_read_input_tokens") + .and_then(|v| v.as_u64()) + .unwrap_or(0), + total_cost_usd: json + .get("total_cost_usd") + .and_then(|v| v.as_f64()) + .unwrap_or(0.0), + }) + } +} + #[derive(Debug, Serialize, Clone)] pub struct AgentInfo { pub story_id: String, diff --git a/server/src/agents/pool.rs b/server/src/agents/pool.rs index 057391b..137a7f0 100644 --- a/server/src/agents/pool.rs +++ b/server/src/agents/pool.rs @@ -500,7 +500,24 @@ impl AgentPool { ) .await { - Ok(session_id) => { + Ok(pty_result) => { + // Persist token usage if the agent reported it. + if let Some(ref usage) = pty_result.token_usage + && let Ok(agents) = agents_ref.lock() + && let Some(agent) = agents.get(&key_clone) + && let Some(ref pr) = agent.project_root + { + let record = super::token_usage::build_record( + &sid, &aname, usage.clone(), + ); + if let Err(e) = super::token_usage::append_record(pr, &record) { + slog_error!( + "[agents] Failed to persist token usage for \ + {sid}:{aname}: {e}" + ); + } + } + // Server-owned completion: run acceptance gates automatically // when the agent process exits normally. run_server_owned_completion( @@ -508,7 +525,7 @@ impl AgentPool { port_for_task, &sid, &aname, - session_id, + pty_result.session_id, watcher_tx_clone.clone(), ) .await; diff --git a/server/src/agents/pty.rs b/server/src/agents/pty.rs index 4702f4e..72c76e0 100644 --- a/server/src/agents/pty.rs +++ b/server/src/agents/pty.rs @@ -5,11 +5,17 @@ use std::sync::{Arc, Mutex}; use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system}; use tokio::sync::broadcast; -use super::AgentEvent; +use super::{AgentEvent, TokenUsage}; use crate::agent_log::AgentLogWriter; use crate::slog; use crate::slog_warn; +/// Result from a PTY agent session, containing the session ID and token usage. +pub(super) struct PtyResult { + pub session_id: Option, + pub token_usage: Option, +} + fn composite_key(story_id: &str, agent_name: &str) -> String { format!("{story_id}:{agent_name}") } @@ -41,7 +47,7 @@ pub(super) async fn run_agent_pty_streaming( log_writer: Option>>, inactivity_timeout_secs: u64, child_killers: Arc>>>, -) -> Result, String> { +) -> Result { let sid = story_id.to_string(); let aname = agent_name.to_string(); let cmd = command.to_string(); @@ -156,7 +162,7 @@ fn run_agent_pty_blocking( log_writer: Option<&Mutex>, inactivity_timeout_secs: u64, child_killers: &Arc>>>, -) -> Result, String> { +) -> Result { let pty_system = native_pty_system(); let pair = pty_system @@ -251,6 +257,7 @@ fn run_agent_pty_blocking( }; let mut session_id: Option = None; + let mut token_usage: Option = None; loop { let recv_result = match timeout_dur { @@ -334,7 +341,21 @@ fn run_agent_pty_blocking( // Complete assistant events are skipped for content extraction // because thinking and text already arrived via stream_event. // The raw JSON is still forwarded as AgentJson below. - "assistant" | "user" | "result" => {} + "assistant" | "user" => {} + "result" => { + // Extract token usage from the result event. + if let Some(usage) = TokenUsage::from_result_event(&json) { + slog!( + "[agent:{story_id}:{agent_name}] Token usage: in={} out={} cache_create={} cache_read={} cost=${:.4}", + usage.input_tokens, + usage.output_tokens, + usage.cache_creation_input_tokens, + usage.cache_read_input_tokens, + usage.total_cost_usd, + ); + token_usage = Some(usage); + } + } _ => {} } @@ -359,7 +380,10 @@ fn run_agent_pty_blocking( session_id ); - Ok(session_id) + Ok(PtyResult { + session_id, + token_usage, + }) } #[cfg(test)] diff --git a/server/src/agents/token_usage.rs b/server/src/agents/token_usage.rs new file mode 100644 index 0000000..86a8831 --- /dev/null +++ b/server/src/agents/token_usage.rs @@ -0,0 +1,194 @@ +use std::fs; +use std::path::Path; + +use chrono::Utc; +use serde::{Deserialize, Serialize}; + +use super::TokenUsage; + +/// A single token usage record persisted to disk. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct TokenUsageRecord { + pub story_id: String, + pub agent_name: String, + pub timestamp: String, + pub usage: TokenUsage, +} + +/// Append a token usage record to the persistent JSONL file. +/// +/// Each line is a self-contained JSON object, making appends atomic and +/// reads simple. The file lives at `.story_kit/token_usage.jsonl`. +pub fn append_record(project_root: &Path, record: &TokenUsageRecord) -> Result<(), String> { + let path = token_usage_path(project_root); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create token_usage directory: {e}"))?; + } + let mut line = + serde_json::to_string(record).map_err(|e| format!("Failed to serialize record: {e}"))?; + line.push('\n'); + use std::io::Write; + let file = fs::OpenOptions::new() + .create(true) + .append(true) + .open(&path) + .map_err(|e| format!("Failed to open token_usage file: {e}"))?; + let mut writer = std::io::BufWriter::new(file); + writer + .write_all(line.as_bytes()) + .map_err(|e| format!("Failed to write token_usage record: {e}"))?; + writer + .flush() + .map_err(|e| format!("Failed to flush token_usage file: {e}"))?; + Ok(()) +} + +/// Read all token usage records from the persistent file. +pub fn read_all(project_root: &Path) -> Result, String> { + let path = token_usage_path(project_root); + if !path.exists() { + return Ok(Vec::new()); + } + let content = + fs::read_to_string(&path).map_err(|e| format!("Failed to read token_usage file: {e}"))?; + let mut records = Vec::new(); + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + match serde_json::from_str::(trimmed) { + Ok(record) => records.push(record), + Err(e) => { + crate::slog_warn!("[token_usage] Skipping malformed line: {e}"); + } + } + } + Ok(records) +} + +/// Build a `TokenUsageRecord` from the parts available at completion time. +pub fn build_record(story_id: &str, agent_name: &str, usage: TokenUsage) -> TokenUsageRecord { + TokenUsageRecord { + story_id: story_id.to_string(), + agent_name: agent_name.to_string(), + timestamp: Utc::now().to_rfc3339(), + usage, + } +} + +fn token_usage_path(project_root: &Path) -> std::path::PathBuf { + project_root.join(".story_kit").join("token_usage.jsonl") +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn sample_usage() -> TokenUsage { + TokenUsage { + input_tokens: 100, + output_tokens: 200, + cache_creation_input_tokens: 5000, + cache_read_input_tokens: 10000, + total_cost_usd: 1.57, + } + } + + #[test] + fn append_and_read_roundtrip() { + let dir = TempDir::new().unwrap(); + let root = dir.path(); + + let record = build_record("42_story_foo", "coder-1", sample_usage()); + append_record(root, &record).unwrap(); + + let records = read_all(root).unwrap(); + assert_eq!(records.len(), 1); + assert_eq!(records[0].story_id, "42_story_foo"); + assert_eq!(records[0].agent_name, "coder-1"); + assert_eq!(records[0].usage, sample_usage()); + } + + #[test] + fn multiple_appends_accumulate() { + let dir = TempDir::new().unwrap(); + let root = dir.path(); + + let r1 = build_record("s1", "coder-1", sample_usage()); + let r2 = build_record("s2", "coder-2", sample_usage()); + append_record(root, &r1).unwrap(); + append_record(root, &r2).unwrap(); + + let records = read_all(root).unwrap(); + assert_eq!(records.len(), 2); + assert_eq!(records[0].story_id, "s1"); + assert_eq!(records[1].story_id, "s2"); + } + + #[test] + fn read_empty_returns_empty() { + let dir = TempDir::new().unwrap(); + let records = read_all(dir.path()).unwrap(); + assert!(records.is_empty()); + } + + #[test] + fn malformed_lines_are_skipped() { + let dir = TempDir::new().unwrap(); + let root = dir.path(); + let path = root.join(".story_kit").join("token_usage.jsonl"); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(&path, "not json\n{\"bad\":true}\n").unwrap(); + + let records = read_all(root).unwrap(); + assert!(records.is_empty()); + } + + #[test] + fn token_usage_from_result_event() { + let json = serde_json::json!({ + "type": "result", + "total_cost_usd": 1.57, + "usage": { + "input_tokens": 7, + "output_tokens": 475, + "cache_creation_input_tokens": 185020, + "cache_read_input_tokens": 810585 + } + }); + + let usage = TokenUsage::from_result_event(&json).unwrap(); + assert_eq!(usage.input_tokens, 7); + assert_eq!(usage.output_tokens, 475); + assert_eq!(usage.cache_creation_input_tokens, 185020); + assert_eq!(usage.cache_read_input_tokens, 810585); + assert!((usage.total_cost_usd - 1.57).abs() < f64::EPSILON); + } + + #[test] + fn token_usage_from_result_event_missing_usage() { + let json = serde_json::json!({"type": "result"}); + assert!(TokenUsage::from_result_event(&json).is_none()); + } + + #[test] + fn token_usage_from_result_event_partial_fields() { + let json = serde_json::json!({ + "type": "result", + "total_cost_usd": 0.5, + "usage": { + "input_tokens": 10, + "output_tokens": 20 + } + }); + + let usage = TokenUsage::from_result_event(&json).unwrap(); + assert_eq!(usage.input_tokens, 10); + assert_eq!(usage.output_tokens, 20); + assert_eq!(usage.cache_creation_input_tokens, 0); + assert_eq!(usage.cache_read_input_tokens, 0); + } +} diff --git a/server/src/http/mcp.rs b/server/src/http/mcp.rs index cfb889e..dac41c9 100644 --- a/server/src/http/mcp.rs +++ b/server/src/http/mcp.rs @@ -962,6 +962,19 @@ fn handle_tools_list(id: Option) -> JsonRpcResponse { }, "required": ["tool_name", "input"] } + }, + { + "name": "get_token_usage", + "description": "Return per-agent token usage records from the persistent log. Shows input tokens, output tokens, cache tokens, and cost in USD for each agent session. Optionally filter by story_id.", + "inputSchema": { + "type": "object", + "properties": { + "story_id": { + "type": "string", + "description": "Optional: filter records to a specific story (e.g. '42_my_story')" + } + } + } } ] }), @@ -1036,6 +1049,8 @@ async fn handle_tools_call( "rebuild_and_restart" => tool_rebuild_and_restart(ctx).await, // Permission bridge (Claude Code → frontend dialog) "prompt_permission" => tool_prompt_permission(&args, ctx).await, + // Token usage + "get_token_usage" => tool_get_token_usage(&args, ctx), _ => Err(format!("Unknown tool: {tool_name}")), }; @@ -2483,6 +2498,51 @@ async fn tool_prompt_permission(args: &Value, ctx: &AppContext) -> Result Result { + let root = ctx.state.get_project_root()?; + let filter_story = args.get("story_id").and_then(|v| v.as_str()); + + let all_records = crate::agents::token_usage::read_all(&root)?; + let records: Vec<_> = all_records + .into_iter() + .filter(|r| filter_story.is_none_or(|s| r.story_id == s)) + .collect(); + + let total_cost: f64 = records.iter().map(|r| r.usage.total_cost_usd).sum(); + let total_input: u64 = records.iter().map(|r| r.usage.input_tokens).sum(); + let total_output: u64 = records.iter().map(|r| r.usage.output_tokens).sum(); + let total_cache_create: u64 = records + .iter() + .map(|r| r.usage.cache_creation_input_tokens) + .sum(); + let total_cache_read: u64 = records + .iter() + .map(|r| r.usage.cache_read_input_tokens) + .sum(); + + serde_json::to_string_pretty(&json!({ + "records": records.iter().map(|r| json!({ + "story_id": r.story_id, + "agent_name": r.agent_name, + "timestamp": r.timestamp, + "input_tokens": r.usage.input_tokens, + "output_tokens": r.usage.output_tokens, + "cache_creation_input_tokens": r.usage.cache_creation_input_tokens, + "cache_read_input_tokens": r.usage.cache_read_input_tokens, + "total_cost_usd": r.usage.total_cost_usd, + })).collect::>(), + "totals": { + "records": records.len(), + "input_tokens": total_input, + "output_tokens": total_output, + "cache_creation_input_tokens": total_cache_create, + "cache_read_input_tokens": total_cache_read, + "total_cost_usd": total_cost, + } + })) + .map_err(|e| format!("Serialization error: {e}")) +} + #[cfg(test)] mod tests { use super::*; @@ -2592,7 +2652,8 @@ mod tests { assert!(names.contains(&"prompt_permission")); assert!(names.contains(&"get_pipeline_status")); assert!(names.contains(&"rebuild_and_restart")); - assert_eq!(tools.len(), 39); + assert!(names.contains(&"get_token_usage")); + assert_eq!(tools.len(), 40); } #[test] @@ -3729,6 +3790,72 @@ mod tests { let _ = result; } + // ── tool_get_token_usage tests ──────────────────────────────── + + #[test] + fn tool_get_token_usage_empty_returns_zero_totals() { + let tmp = tempfile::tempdir().unwrap(); + let ctx = test_ctx(tmp.path()); + let result = tool_get_token_usage(&json!({}), &ctx).unwrap(); + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert_eq!(parsed["records"].as_array().unwrap().len(), 0); + assert_eq!(parsed["totals"]["records"], 0); + assert_eq!(parsed["totals"]["total_cost_usd"], 0.0); + } + + #[test] + fn tool_get_token_usage_returns_written_records() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let ctx = test_ctx(root); + + let usage = crate::agents::TokenUsage { + input_tokens: 100, + output_tokens: 200, + cache_creation_input_tokens: 5000, + cache_read_input_tokens: 10000, + total_cost_usd: 1.57, + }; + let record = + crate::agents::token_usage::build_record("42_story_foo", "coder-1", usage); + crate::agents::token_usage::append_record(root, &record).unwrap(); + + let result = tool_get_token_usage(&json!({}), &ctx).unwrap(); + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert_eq!(parsed["records"].as_array().unwrap().len(), 1); + assert_eq!(parsed["records"][0]["story_id"], "42_story_foo"); + assert_eq!(parsed["records"][0]["agent_name"], "coder-1"); + assert_eq!(parsed["records"][0]["input_tokens"], 100); + assert_eq!(parsed["totals"]["records"], 1); + assert!((parsed["totals"]["total_cost_usd"].as_f64().unwrap() - 1.57).abs() < f64::EPSILON); + } + + #[test] + fn tool_get_token_usage_filters_by_story_id() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let ctx = test_ctx(root); + + let usage = crate::agents::TokenUsage { + input_tokens: 50, + output_tokens: 60, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + total_cost_usd: 0.5, + }; + let r1 = crate::agents::token_usage::build_record("10_story_a", "coder-1", usage.clone()); + let r2 = crate::agents::token_usage::build_record("20_story_b", "coder-2", usage); + crate::agents::token_usage::append_record(root, &r1).unwrap(); + crate::agents::token_usage::append_record(root, &r2).unwrap(); + + let result = + tool_get_token_usage(&json!({"story_id": "10_story_a"}), &ctx).unwrap(); + let parsed: Value = serde_json::from_str(&result).unwrap(); + assert_eq!(parsed["records"].as_array().unwrap().len(), 1); + assert_eq!(parsed["records"][0]["story_id"], "10_story_a"); + assert_eq!(parsed["totals"]["records"], 1); + } + // ── tool_list_worktrees tests ───────────────────────────────── #[test]