story-kit: merge 296_story_track_per_agent_token_usage_for_cost_visibility_and_optimisation

This commit is contained in:
Dave
2026-03-19 09:53:32 +00:00
parent 6c413e1fc7
commit 9cdb0d4ea8
5 changed files with 411 additions and 9 deletions

View File

@@ -3,9 +3,10 @@ pub mod lifecycle;
pub mod merge; pub mod merge;
mod pool; mod pool;
mod pty; mod pty;
pub mod token_usage;
use crate::config::AgentConfig; use crate::config::AgentConfig;
use serde::Serialize; use serde::{Deserialize, Serialize};
pub use lifecycle::{ pub use lifecycle::{
close_bug_to_archive, feature_branch_has_unmerged_changes, move_story_to_archived, close_bug_to_archive, feature_branch_has_unmerged_changes, move_story_to_archived,
@@ -136,6 +137,45 @@ pub struct CompletionReport {
pub gate_output: String, pub gate_output: String,
} }
/// Token usage from a Claude Code session's `result` event.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TokenUsage {
pub input_tokens: u64,
pub output_tokens: u64,
pub cache_creation_input_tokens: u64,
pub cache_read_input_tokens: u64,
pub total_cost_usd: f64,
}
impl TokenUsage {
/// Parse token usage from a Claude Code `result` JSON event.
pub fn from_result_event(json: &serde_json::Value) -> Option<Self> {
let usage = json.get("usage")?;
Some(Self {
input_tokens: usage
.get("input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
output_tokens: usage
.get("output_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
cache_creation_input_tokens: usage
.get("cache_creation_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
cache_read_input_tokens: usage
.get("cache_read_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
total_cost_usd: json
.get("total_cost_usd")
.and_then(|v| v.as_f64())
.unwrap_or(0.0),
})
}
}
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
pub struct AgentInfo { pub struct AgentInfo {
pub story_id: String, pub story_id: String,

View File

@@ -500,7 +500,24 @@ impl AgentPool {
) )
.await .await
{ {
Ok(session_id) => { Ok(pty_result) => {
// Persist token usage if the agent reported it.
if let Some(ref usage) = pty_result.token_usage
&& let Ok(agents) = agents_ref.lock()
&& let Some(agent) = agents.get(&key_clone)
&& let Some(ref pr) = agent.project_root
{
let record = super::token_usage::build_record(
&sid, &aname, usage.clone(),
);
if let Err(e) = super::token_usage::append_record(pr, &record) {
slog_error!(
"[agents] Failed to persist token usage for \
{sid}:{aname}: {e}"
);
}
}
// Server-owned completion: run acceptance gates automatically // Server-owned completion: run acceptance gates automatically
// when the agent process exits normally. // when the agent process exits normally.
run_server_owned_completion( run_server_owned_completion(
@@ -508,7 +525,7 @@ impl AgentPool {
port_for_task, port_for_task,
&sid, &sid,
&aname, &aname,
session_id, pty_result.session_id,
watcher_tx_clone.clone(), watcher_tx_clone.clone(),
) )
.await; .await;

View File

@@ -5,11 +5,17 @@ use std::sync::{Arc, Mutex};
use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system}; use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system};
use tokio::sync::broadcast; use tokio::sync::broadcast;
use super::AgentEvent; use super::{AgentEvent, TokenUsage};
use crate::agent_log::AgentLogWriter; use crate::agent_log::AgentLogWriter;
use crate::slog; use crate::slog;
use crate::slog_warn; use crate::slog_warn;
/// Result from a PTY agent session, containing the session ID and token usage.
pub(super) struct PtyResult {
pub session_id: Option<String>,
pub token_usage: Option<TokenUsage>,
}
fn composite_key(story_id: &str, agent_name: &str) -> String { fn composite_key(story_id: &str, agent_name: &str) -> String {
format!("{story_id}:{agent_name}") format!("{story_id}:{agent_name}")
} }
@@ -41,7 +47,7 @@ pub(super) async fn run_agent_pty_streaming(
log_writer: Option<Arc<Mutex<AgentLogWriter>>>, log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
inactivity_timeout_secs: u64, inactivity_timeout_secs: u64,
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>, child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
) -> Result<Option<String>, String> { ) -> Result<PtyResult, String> {
let sid = story_id.to_string(); let sid = story_id.to_string();
let aname = agent_name.to_string(); let aname = agent_name.to_string();
let cmd = command.to_string(); let cmd = command.to_string();
@@ -156,7 +162,7 @@ fn run_agent_pty_blocking(
log_writer: Option<&Mutex<AgentLogWriter>>, log_writer: Option<&Mutex<AgentLogWriter>>,
inactivity_timeout_secs: u64, inactivity_timeout_secs: u64,
child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>, child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
) -> Result<Option<String>, String> { ) -> Result<PtyResult, String> {
let pty_system = native_pty_system(); let pty_system = native_pty_system();
let pair = pty_system let pair = pty_system
@@ -251,6 +257,7 @@ fn run_agent_pty_blocking(
}; };
let mut session_id: Option<String> = None; let mut session_id: Option<String> = None;
let mut token_usage: Option<TokenUsage> = None;
loop { loop {
let recv_result = match timeout_dur { let recv_result = match timeout_dur {
@@ -334,7 +341,21 @@ fn run_agent_pty_blocking(
// Complete assistant events are skipped for content extraction // Complete assistant events are skipped for content extraction
// because thinking and text already arrived via stream_event. // because thinking and text already arrived via stream_event.
// The raw JSON is still forwarded as AgentJson below. // The raw JSON is still forwarded as AgentJson below.
"assistant" | "user" | "result" => {} "assistant" | "user" => {}
"result" => {
// Extract token usage from the result event.
if let Some(usage) = TokenUsage::from_result_event(&json) {
slog!(
"[agent:{story_id}:{agent_name}] Token usage: in={} out={} cache_create={} cache_read={} cost=${:.4}",
usage.input_tokens,
usage.output_tokens,
usage.cache_creation_input_tokens,
usage.cache_read_input_tokens,
usage.total_cost_usd,
);
token_usage = Some(usage);
}
}
_ => {} _ => {}
} }
@@ -359,7 +380,10 @@ fn run_agent_pty_blocking(
session_id session_id
); );
Ok(session_id) Ok(PtyResult {
session_id,
token_usage,
})
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -0,0 +1,194 @@
use std::fs;
use std::path::Path;
use chrono::Utc;
use serde::{Deserialize, Serialize};
use super::TokenUsage;
/// A single token usage record persisted to disk.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TokenUsageRecord {
pub story_id: String,
pub agent_name: String,
pub timestamp: String,
pub usage: TokenUsage,
}
/// Append a token usage record to the persistent JSONL file.
///
/// Each line is a self-contained JSON object, making appends atomic and
/// reads simple. The file lives at `.story_kit/token_usage.jsonl`.
pub fn append_record(project_root: &Path, record: &TokenUsageRecord) -> Result<(), String> {
let path = token_usage_path(project_root);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)
.map_err(|e| format!("Failed to create token_usage directory: {e}"))?;
}
let mut line =
serde_json::to_string(record).map_err(|e| format!("Failed to serialize record: {e}"))?;
line.push('\n');
use std::io::Write;
let file = fs::OpenOptions::new()
.create(true)
.append(true)
.open(&path)
.map_err(|e| format!("Failed to open token_usage file: {e}"))?;
let mut writer = std::io::BufWriter::new(file);
writer
.write_all(line.as_bytes())
.map_err(|e| format!("Failed to write token_usage record: {e}"))?;
writer
.flush()
.map_err(|e| format!("Failed to flush token_usage file: {e}"))?;
Ok(())
}
/// Read all token usage records from the persistent file.
pub fn read_all(project_root: &Path) -> Result<Vec<TokenUsageRecord>, String> {
let path = token_usage_path(project_root);
if !path.exists() {
return Ok(Vec::new());
}
let content =
fs::read_to_string(&path).map_err(|e| format!("Failed to read token_usage file: {e}"))?;
let mut records = Vec::new();
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
match serde_json::from_str::<TokenUsageRecord>(trimmed) {
Ok(record) => records.push(record),
Err(e) => {
crate::slog_warn!("[token_usage] Skipping malformed line: {e}");
}
}
}
Ok(records)
}
/// Build a `TokenUsageRecord` from the parts available at completion time.
pub fn build_record(story_id: &str, agent_name: &str, usage: TokenUsage) -> TokenUsageRecord {
TokenUsageRecord {
story_id: story_id.to_string(),
agent_name: agent_name.to_string(),
timestamp: Utc::now().to_rfc3339(),
usage,
}
}
fn token_usage_path(project_root: &Path) -> std::path::PathBuf {
project_root.join(".story_kit").join("token_usage.jsonl")
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn sample_usage() -> TokenUsage {
TokenUsage {
input_tokens: 100,
output_tokens: 200,
cache_creation_input_tokens: 5000,
cache_read_input_tokens: 10000,
total_cost_usd: 1.57,
}
}
#[test]
fn append_and_read_roundtrip() {
let dir = TempDir::new().unwrap();
let root = dir.path();
let record = build_record("42_story_foo", "coder-1", sample_usage());
append_record(root, &record).unwrap();
let records = read_all(root).unwrap();
assert_eq!(records.len(), 1);
assert_eq!(records[0].story_id, "42_story_foo");
assert_eq!(records[0].agent_name, "coder-1");
assert_eq!(records[0].usage, sample_usage());
}
#[test]
fn multiple_appends_accumulate() {
let dir = TempDir::new().unwrap();
let root = dir.path();
let r1 = build_record("s1", "coder-1", sample_usage());
let r2 = build_record("s2", "coder-2", sample_usage());
append_record(root, &r1).unwrap();
append_record(root, &r2).unwrap();
let records = read_all(root).unwrap();
assert_eq!(records.len(), 2);
assert_eq!(records[0].story_id, "s1");
assert_eq!(records[1].story_id, "s2");
}
#[test]
fn read_empty_returns_empty() {
let dir = TempDir::new().unwrap();
let records = read_all(dir.path()).unwrap();
assert!(records.is_empty());
}
#[test]
fn malformed_lines_are_skipped() {
let dir = TempDir::new().unwrap();
let root = dir.path();
let path = root.join(".story_kit").join("token_usage.jsonl");
fs::create_dir_all(path.parent().unwrap()).unwrap();
fs::write(&path, "not json\n{\"bad\":true}\n").unwrap();
let records = read_all(root).unwrap();
assert!(records.is_empty());
}
#[test]
fn token_usage_from_result_event() {
let json = serde_json::json!({
"type": "result",
"total_cost_usd": 1.57,
"usage": {
"input_tokens": 7,
"output_tokens": 475,
"cache_creation_input_tokens": 185020,
"cache_read_input_tokens": 810585
}
});
let usage = TokenUsage::from_result_event(&json).unwrap();
assert_eq!(usage.input_tokens, 7);
assert_eq!(usage.output_tokens, 475);
assert_eq!(usage.cache_creation_input_tokens, 185020);
assert_eq!(usage.cache_read_input_tokens, 810585);
assert!((usage.total_cost_usd - 1.57).abs() < f64::EPSILON);
}
#[test]
fn token_usage_from_result_event_missing_usage() {
let json = serde_json::json!({"type": "result"});
assert!(TokenUsage::from_result_event(&json).is_none());
}
#[test]
fn token_usage_from_result_event_partial_fields() {
let json = serde_json::json!({
"type": "result",
"total_cost_usd": 0.5,
"usage": {
"input_tokens": 10,
"output_tokens": 20
}
});
let usage = TokenUsage::from_result_event(&json).unwrap();
assert_eq!(usage.input_tokens, 10);
assert_eq!(usage.output_tokens, 20);
assert_eq!(usage.cache_creation_input_tokens, 0);
assert_eq!(usage.cache_read_input_tokens, 0);
}
}

View File

@@ -962,6 +962,19 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
}, },
"required": ["tool_name", "input"] "required": ["tool_name", "input"]
} }
},
{
"name": "get_token_usage",
"description": "Return per-agent token usage records from the persistent log. Shows input tokens, output tokens, cache tokens, and cost in USD for each agent session. Optionally filter by story_id.",
"inputSchema": {
"type": "object",
"properties": {
"story_id": {
"type": "string",
"description": "Optional: filter records to a specific story (e.g. '42_my_story')"
}
}
}
} }
] ]
}), }),
@@ -1036,6 +1049,8 @@ async fn handle_tools_call(
"rebuild_and_restart" => tool_rebuild_and_restart(ctx).await, "rebuild_and_restart" => tool_rebuild_and_restart(ctx).await,
// Permission bridge (Claude Code → frontend dialog) // Permission bridge (Claude Code → frontend dialog)
"prompt_permission" => tool_prompt_permission(&args, ctx).await, "prompt_permission" => tool_prompt_permission(&args, ctx).await,
// Token usage
"get_token_usage" => tool_get_token_usage(&args, ctx),
_ => Err(format!("Unknown tool: {tool_name}")), _ => Err(format!("Unknown tool: {tool_name}")),
}; };
@@ -2483,6 +2498,51 @@ async fn tool_prompt_permission(args: &Value, ctx: &AppContext) -> Result<String
} }
} }
fn tool_get_token_usage(args: &Value, ctx: &AppContext) -> Result<String, String> {
let root = ctx.state.get_project_root()?;
let filter_story = args.get("story_id").and_then(|v| v.as_str());
let all_records = crate::agents::token_usage::read_all(&root)?;
let records: Vec<_> = all_records
.into_iter()
.filter(|r| filter_story.is_none_or(|s| r.story_id == s))
.collect();
let total_cost: f64 = records.iter().map(|r| r.usage.total_cost_usd).sum();
let total_input: u64 = records.iter().map(|r| r.usage.input_tokens).sum();
let total_output: u64 = records.iter().map(|r| r.usage.output_tokens).sum();
let total_cache_create: u64 = records
.iter()
.map(|r| r.usage.cache_creation_input_tokens)
.sum();
let total_cache_read: u64 = records
.iter()
.map(|r| r.usage.cache_read_input_tokens)
.sum();
serde_json::to_string_pretty(&json!({
"records": records.iter().map(|r| json!({
"story_id": r.story_id,
"agent_name": r.agent_name,
"timestamp": r.timestamp,
"input_tokens": r.usage.input_tokens,
"output_tokens": r.usage.output_tokens,
"cache_creation_input_tokens": r.usage.cache_creation_input_tokens,
"cache_read_input_tokens": r.usage.cache_read_input_tokens,
"total_cost_usd": r.usage.total_cost_usd,
})).collect::<Vec<_>>(),
"totals": {
"records": records.len(),
"input_tokens": total_input,
"output_tokens": total_output,
"cache_creation_input_tokens": total_cache_create,
"cache_read_input_tokens": total_cache_read,
"total_cost_usd": total_cost,
}
}))
.map_err(|e| format!("Serialization error: {e}"))
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -2592,7 +2652,8 @@ mod tests {
assert!(names.contains(&"prompt_permission")); assert!(names.contains(&"prompt_permission"));
assert!(names.contains(&"get_pipeline_status")); assert!(names.contains(&"get_pipeline_status"));
assert!(names.contains(&"rebuild_and_restart")); assert!(names.contains(&"rebuild_and_restart"));
assert_eq!(tools.len(), 39); assert!(names.contains(&"get_token_usage"));
assert_eq!(tools.len(), 40);
} }
#[test] #[test]
@@ -3729,6 +3790,72 @@ mod tests {
let _ = result; let _ = result;
} }
// ── tool_get_token_usage tests ────────────────────────────────
#[test]
fn tool_get_token_usage_empty_returns_zero_totals() {
let tmp = tempfile::tempdir().unwrap();
let ctx = test_ctx(tmp.path());
let result = tool_get_token_usage(&json!({}), &ctx).unwrap();
let parsed: Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["records"].as_array().unwrap().len(), 0);
assert_eq!(parsed["totals"]["records"], 0);
assert_eq!(parsed["totals"]["total_cost_usd"], 0.0);
}
#[test]
fn tool_get_token_usage_returns_written_records() {
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let ctx = test_ctx(root);
let usage = crate::agents::TokenUsage {
input_tokens: 100,
output_tokens: 200,
cache_creation_input_tokens: 5000,
cache_read_input_tokens: 10000,
total_cost_usd: 1.57,
};
let record =
crate::agents::token_usage::build_record("42_story_foo", "coder-1", usage);
crate::agents::token_usage::append_record(root, &record).unwrap();
let result = tool_get_token_usage(&json!({}), &ctx).unwrap();
let parsed: Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["records"].as_array().unwrap().len(), 1);
assert_eq!(parsed["records"][0]["story_id"], "42_story_foo");
assert_eq!(parsed["records"][0]["agent_name"], "coder-1");
assert_eq!(parsed["records"][0]["input_tokens"], 100);
assert_eq!(parsed["totals"]["records"], 1);
assert!((parsed["totals"]["total_cost_usd"].as_f64().unwrap() - 1.57).abs() < f64::EPSILON);
}
#[test]
fn tool_get_token_usage_filters_by_story_id() {
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
let ctx = test_ctx(root);
let usage = crate::agents::TokenUsage {
input_tokens: 50,
output_tokens: 60,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
total_cost_usd: 0.5,
};
let r1 = crate::agents::token_usage::build_record("10_story_a", "coder-1", usage.clone());
let r2 = crate::agents::token_usage::build_record("20_story_b", "coder-2", usage);
crate::agents::token_usage::append_record(root, &r1).unwrap();
crate::agents::token_usage::append_record(root, &r2).unwrap();
let result =
tool_get_token_usage(&json!({"story_id": "10_story_a"}), &ctx).unwrap();
let parsed: Value = serde_json::from_str(&result).unwrap();
assert_eq!(parsed["records"].as_array().unwrap().len(), 1);
assert_eq!(parsed["records"][0]["story_id"], "10_story_a");
assert_eq!(parsed["totals"]["records"], 1);
}
// ── tool_list_worktrees tests ───────────────────────────────── // ── tool_list_worktrees tests ─────────────────────────────────
#[test] #[test]