story-kit: merge 296_story_track_per_agent_token_usage_for_cost_visibility_and_optimisation
This commit is contained in:
@@ -3,9 +3,10 @@ pub mod lifecycle;
|
||||
pub mod merge;
|
||||
mod pool;
|
||||
mod pty;
|
||||
pub mod token_usage;
|
||||
|
||||
use crate::config::AgentConfig;
|
||||
use serde::Serialize;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use lifecycle::{
|
||||
close_bug_to_archive, feature_branch_has_unmerged_changes, move_story_to_archived,
|
||||
@@ -136,6 +137,45 @@ pub struct CompletionReport {
|
||||
pub gate_output: String,
|
||||
}
|
||||
|
||||
/// Token usage from a Claude Code session's `result` event.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TokenUsage {
|
||||
pub input_tokens: u64,
|
||||
pub output_tokens: u64,
|
||||
pub cache_creation_input_tokens: u64,
|
||||
pub cache_read_input_tokens: u64,
|
||||
pub total_cost_usd: f64,
|
||||
}
|
||||
|
||||
impl TokenUsage {
|
||||
/// Parse token usage from a Claude Code `result` JSON event.
|
||||
pub fn from_result_event(json: &serde_json::Value) -> Option<Self> {
|
||||
let usage = json.get("usage")?;
|
||||
Some(Self {
|
||||
input_tokens: usage
|
||||
.get("input_tokens")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(0),
|
||||
output_tokens: usage
|
||||
.get("output_tokens")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(0),
|
||||
cache_creation_input_tokens: usage
|
||||
.get("cache_creation_input_tokens")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(0),
|
||||
cache_read_input_tokens: usage
|
||||
.get("cache_read_input_tokens")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(0),
|
||||
total_cost_usd: json
|
||||
.get("total_cost_usd")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
pub struct AgentInfo {
|
||||
pub story_id: String,
|
||||
|
||||
@@ -500,7 +500,24 @@ impl AgentPool {
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(session_id) => {
|
||||
Ok(pty_result) => {
|
||||
// Persist token usage if the agent reported it.
|
||||
if let Some(ref usage) = pty_result.token_usage
|
||||
&& let Ok(agents) = agents_ref.lock()
|
||||
&& let Some(agent) = agents.get(&key_clone)
|
||||
&& let Some(ref pr) = agent.project_root
|
||||
{
|
||||
let record = super::token_usage::build_record(
|
||||
&sid, &aname, usage.clone(),
|
||||
);
|
||||
if let Err(e) = super::token_usage::append_record(pr, &record) {
|
||||
slog_error!(
|
||||
"[agents] Failed to persist token usage for \
|
||||
{sid}:{aname}: {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Server-owned completion: run acceptance gates automatically
|
||||
// when the agent process exits normally.
|
||||
run_server_owned_completion(
|
||||
@@ -508,7 +525,7 @@ impl AgentPool {
|
||||
port_for_task,
|
||||
&sid,
|
||||
&aname,
|
||||
session_id,
|
||||
pty_result.session_id,
|
||||
watcher_tx_clone.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -5,11 +5,17 @@ use std::sync::{Arc, Mutex};
|
||||
use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system};
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use super::AgentEvent;
|
||||
use super::{AgentEvent, TokenUsage};
|
||||
use crate::agent_log::AgentLogWriter;
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
/// Result from a PTY agent session, containing the session ID and token usage.
|
||||
pub(super) struct PtyResult {
|
||||
pub session_id: Option<String>,
|
||||
pub token_usage: Option<TokenUsage>,
|
||||
}
|
||||
|
||||
fn composite_key(story_id: &str, agent_name: &str) -> String {
|
||||
format!("{story_id}:{agent_name}")
|
||||
}
|
||||
@@ -41,7 +47,7 @@ pub(super) async fn run_agent_pty_streaming(
|
||||
log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
|
||||
inactivity_timeout_secs: u64,
|
||||
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
) -> Result<Option<String>, String> {
|
||||
) -> Result<PtyResult, String> {
|
||||
let sid = story_id.to_string();
|
||||
let aname = agent_name.to_string();
|
||||
let cmd = command.to_string();
|
||||
@@ -156,7 +162,7 @@ fn run_agent_pty_blocking(
|
||||
log_writer: Option<&Mutex<AgentLogWriter>>,
|
||||
inactivity_timeout_secs: u64,
|
||||
child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
) -> Result<Option<String>, String> {
|
||||
) -> Result<PtyResult, String> {
|
||||
let pty_system = native_pty_system();
|
||||
|
||||
let pair = pty_system
|
||||
@@ -251,6 +257,7 @@ fn run_agent_pty_blocking(
|
||||
};
|
||||
|
||||
let mut session_id: Option<String> = None;
|
||||
let mut token_usage: Option<TokenUsage> = None;
|
||||
|
||||
loop {
|
||||
let recv_result = match timeout_dur {
|
||||
@@ -334,7 +341,21 @@ fn run_agent_pty_blocking(
|
||||
// Complete assistant events are skipped for content extraction
|
||||
// because thinking and text already arrived via stream_event.
|
||||
// The raw JSON is still forwarded as AgentJson below.
|
||||
"assistant" | "user" | "result" => {}
|
||||
"assistant" | "user" => {}
|
||||
"result" => {
|
||||
// Extract token usage from the result event.
|
||||
if let Some(usage) = TokenUsage::from_result_event(&json) {
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] Token usage: in={} out={} cache_create={} cache_read={} cost=${:.4}",
|
||||
usage.input_tokens,
|
||||
usage.output_tokens,
|
||||
usage.cache_creation_input_tokens,
|
||||
usage.cache_read_input_tokens,
|
||||
usage.total_cost_usd,
|
||||
);
|
||||
token_usage = Some(usage);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -359,7 +380,10 @@ fn run_agent_pty_blocking(
|
||||
session_id
|
||||
);
|
||||
|
||||
Ok(session_id)
|
||||
Ok(PtyResult {
|
||||
session_id,
|
||||
token_usage,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
194
server/src/agents/token_usage.rs
Normal file
194
server/src/agents/token_usage.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use chrono::Utc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::TokenUsage;
|
||||
|
||||
/// A single token usage record persisted to disk.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TokenUsageRecord {
|
||||
pub story_id: String,
|
||||
pub agent_name: String,
|
||||
pub timestamp: String,
|
||||
pub usage: TokenUsage,
|
||||
}
|
||||
|
||||
/// Append a token usage record to the persistent JSONL file.
|
||||
///
|
||||
/// Each line is a self-contained JSON object, making appends atomic and
|
||||
/// reads simple. The file lives at `.story_kit/token_usage.jsonl`.
|
||||
pub fn append_record(project_root: &Path, record: &TokenUsageRecord) -> Result<(), String> {
|
||||
let path = token_usage_path(project_root);
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.map_err(|e| format!("Failed to create token_usage directory: {e}"))?;
|
||||
}
|
||||
let mut line =
|
||||
serde_json::to_string(record).map_err(|e| format!("Failed to serialize record: {e}"))?;
|
||||
line.push('\n');
|
||||
use std::io::Write;
|
||||
let file = fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&path)
|
||||
.map_err(|e| format!("Failed to open token_usage file: {e}"))?;
|
||||
let mut writer = std::io::BufWriter::new(file);
|
||||
writer
|
||||
.write_all(line.as_bytes())
|
||||
.map_err(|e| format!("Failed to write token_usage record: {e}"))?;
|
||||
writer
|
||||
.flush()
|
||||
.map_err(|e| format!("Failed to flush token_usage file: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read all token usage records from the persistent file.
|
||||
pub fn read_all(project_root: &Path) -> Result<Vec<TokenUsageRecord>, String> {
|
||||
let path = token_usage_path(project_root);
|
||||
if !path.exists() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let content =
|
||||
fs::read_to_string(&path).map_err(|e| format!("Failed to read token_usage file: {e}"))?;
|
||||
let mut records = Vec::new();
|
||||
for line in content.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
match serde_json::from_str::<TokenUsageRecord>(trimmed) {
|
||||
Ok(record) => records.push(record),
|
||||
Err(e) => {
|
||||
crate::slog_warn!("[token_usage] Skipping malformed line: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
/// Build a `TokenUsageRecord` from the parts available at completion time.
|
||||
pub fn build_record(story_id: &str, agent_name: &str, usage: TokenUsage) -> TokenUsageRecord {
|
||||
TokenUsageRecord {
|
||||
story_id: story_id.to_string(),
|
||||
agent_name: agent_name.to_string(),
|
||||
timestamp: Utc::now().to_rfc3339(),
|
||||
usage,
|
||||
}
|
||||
}
|
||||
|
||||
fn token_usage_path(project_root: &Path) -> std::path::PathBuf {
|
||||
project_root.join(".story_kit").join("token_usage.jsonl")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn sample_usage() -> TokenUsage {
|
||||
TokenUsage {
|
||||
input_tokens: 100,
|
||||
output_tokens: 200,
|
||||
cache_creation_input_tokens: 5000,
|
||||
cache_read_input_tokens: 10000,
|
||||
total_cost_usd: 1.57,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn append_and_read_roundtrip() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let root = dir.path();
|
||||
|
||||
let record = build_record("42_story_foo", "coder-1", sample_usage());
|
||||
append_record(root, &record).unwrap();
|
||||
|
||||
let records = read_all(root).unwrap();
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].story_id, "42_story_foo");
|
||||
assert_eq!(records[0].agent_name, "coder-1");
|
||||
assert_eq!(records[0].usage, sample_usage());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_appends_accumulate() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let root = dir.path();
|
||||
|
||||
let r1 = build_record("s1", "coder-1", sample_usage());
|
||||
let r2 = build_record("s2", "coder-2", sample_usage());
|
||||
append_record(root, &r1).unwrap();
|
||||
append_record(root, &r2).unwrap();
|
||||
|
||||
let records = read_all(root).unwrap();
|
||||
assert_eq!(records.len(), 2);
|
||||
assert_eq!(records[0].story_id, "s1");
|
||||
assert_eq!(records[1].story_id, "s2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_empty_returns_empty() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let records = read_all(dir.path()).unwrap();
|
||||
assert!(records.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_lines_are_skipped() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let root = dir.path();
|
||||
let path = root.join(".story_kit").join("token_usage.jsonl");
|
||||
fs::create_dir_all(path.parent().unwrap()).unwrap();
|
||||
fs::write(&path, "not json\n{\"bad\":true}\n").unwrap();
|
||||
|
||||
let records = read_all(root).unwrap();
|
||||
assert!(records.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_usage_from_result_event() {
|
||||
let json = serde_json::json!({
|
||||
"type": "result",
|
||||
"total_cost_usd": 1.57,
|
||||
"usage": {
|
||||
"input_tokens": 7,
|
||||
"output_tokens": 475,
|
||||
"cache_creation_input_tokens": 185020,
|
||||
"cache_read_input_tokens": 810585
|
||||
}
|
||||
});
|
||||
|
||||
let usage = TokenUsage::from_result_event(&json).unwrap();
|
||||
assert_eq!(usage.input_tokens, 7);
|
||||
assert_eq!(usage.output_tokens, 475);
|
||||
assert_eq!(usage.cache_creation_input_tokens, 185020);
|
||||
assert_eq!(usage.cache_read_input_tokens, 810585);
|
||||
assert!((usage.total_cost_usd - 1.57).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_usage_from_result_event_missing_usage() {
|
||||
let json = serde_json::json!({"type": "result"});
|
||||
assert!(TokenUsage::from_result_event(&json).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_usage_from_result_event_partial_fields() {
|
||||
let json = serde_json::json!({
|
||||
"type": "result",
|
||||
"total_cost_usd": 0.5,
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 20
|
||||
}
|
||||
});
|
||||
|
||||
let usage = TokenUsage::from_result_event(&json).unwrap();
|
||||
assert_eq!(usage.input_tokens, 10);
|
||||
assert_eq!(usage.output_tokens, 20);
|
||||
assert_eq!(usage.cache_creation_input_tokens, 0);
|
||||
assert_eq!(usage.cache_read_input_tokens, 0);
|
||||
}
|
||||
}
|
||||
@@ -962,6 +962,19 @@ fn handle_tools_list(id: Option<Value>) -> JsonRpcResponse {
|
||||
},
|
||||
"required": ["tool_name", "input"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "get_token_usage",
|
||||
"description": "Return per-agent token usage records from the persistent log. Shows input tokens, output tokens, cache tokens, and cost in USD for each agent session. Optionally filter by story_id.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"story_id": {
|
||||
"type": "string",
|
||||
"description": "Optional: filter records to a specific story (e.g. '42_my_story')"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}),
|
||||
@@ -1036,6 +1049,8 @@ async fn handle_tools_call(
|
||||
"rebuild_and_restart" => tool_rebuild_and_restart(ctx).await,
|
||||
// Permission bridge (Claude Code → frontend dialog)
|
||||
"prompt_permission" => tool_prompt_permission(&args, ctx).await,
|
||||
// Token usage
|
||||
"get_token_usage" => tool_get_token_usage(&args, ctx),
|
||||
_ => Err(format!("Unknown tool: {tool_name}")),
|
||||
};
|
||||
|
||||
@@ -2483,6 +2498,51 @@ async fn tool_prompt_permission(args: &Value, ctx: &AppContext) -> Result<String
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_get_token_usage(args: &Value, ctx: &AppContext) -> Result<String, String> {
|
||||
let root = ctx.state.get_project_root()?;
|
||||
let filter_story = args.get("story_id").and_then(|v| v.as_str());
|
||||
|
||||
let all_records = crate::agents::token_usage::read_all(&root)?;
|
||||
let records: Vec<_> = all_records
|
||||
.into_iter()
|
||||
.filter(|r| filter_story.is_none_or(|s| r.story_id == s))
|
||||
.collect();
|
||||
|
||||
let total_cost: f64 = records.iter().map(|r| r.usage.total_cost_usd).sum();
|
||||
let total_input: u64 = records.iter().map(|r| r.usage.input_tokens).sum();
|
||||
let total_output: u64 = records.iter().map(|r| r.usage.output_tokens).sum();
|
||||
let total_cache_create: u64 = records
|
||||
.iter()
|
||||
.map(|r| r.usage.cache_creation_input_tokens)
|
||||
.sum();
|
||||
let total_cache_read: u64 = records
|
||||
.iter()
|
||||
.map(|r| r.usage.cache_read_input_tokens)
|
||||
.sum();
|
||||
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"records": records.iter().map(|r| json!({
|
||||
"story_id": r.story_id,
|
||||
"agent_name": r.agent_name,
|
||||
"timestamp": r.timestamp,
|
||||
"input_tokens": r.usage.input_tokens,
|
||||
"output_tokens": r.usage.output_tokens,
|
||||
"cache_creation_input_tokens": r.usage.cache_creation_input_tokens,
|
||||
"cache_read_input_tokens": r.usage.cache_read_input_tokens,
|
||||
"total_cost_usd": r.usage.total_cost_usd,
|
||||
})).collect::<Vec<_>>(),
|
||||
"totals": {
|
||||
"records": records.len(),
|
||||
"input_tokens": total_input,
|
||||
"output_tokens": total_output,
|
||||
"cache_creation_input_tokens": total_cache_create,
|
||||
"cache_read_input_tokens": total_cache_read,
|
||||
"total_cost_usd": total_cost,
|
||||
}
|
||||
}))
|
||||
.map_err(|e| format!("Serialization error: {e}"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -2592,7 +2652,8 @@ mod tests {
|
||||
assert!(names.contains(&"prompt_permission"));
|
||||
assert!(names.contains(&"get_pipeline_status"));
|
||||
assert!(names.contains(&"rebuild_and_restart"));
|
||||
assert_eq!(tools.len(), 39);
|
||||
assert!(names.contains(&"get_token_usage"));
|
||||
assert_eq!(tools.len(), 40);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -3729,6 +3790,72 @@ mod tests {
|
||||
let _ = result;
|
||||
}
|
||||
|
||||
// ── tool_get_token_usage tests ────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn tool_get_token_usage_empty_returns_zero_totals() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let ctx = test_ctx(tmp.path());
|
||||
let result = tool_get_token_usage(&json!({}), &ctx).unwrap();
|
||||
let parsed: Value = serde_json::from_str(&result).unwrap();
|
||||
assert_eq!(parsed["records"].as_array().unwrap().len(), 0);
|
||||
assert_eq!(parsed["totals"]["records"], 0);
|
||||
assert_eq!(parsed["totals"]["total_cost_usd"], 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_get_token_usage_returns_written_records() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
let ctx = test_ctx(root);
|
||||
|
||||
let usage = crate::agents::TokenUsage {
|
||||
input_tokens: 100,
|
||||
output_tokens: 200,
|
||||
cache_creation_input_tokens: 5000,
|
||||
cache_read_input_tokens: 10000,
|
||||
total_cost_usd: 1.57,
|
||||
};
|
||||
let record =
|
||||
crate::agents::token_usage::build_record("42_story_foo", "coder-1", usage);
|
||||
crate::agents::token_usage::append_record(root, &record).unwrap();
|
||||
|
||||
let result = tool_get_token_usage(&json!({}), &ctx).unwrap();
|
||||
let parsed: Value = serde_json::from_str(&result).unwrap();
|
||||
assert_eq!(parsed["records"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(parsed["records"][0]["story_id"], "42_story_foo");
|
||||
assert_eq!(parsed["records"][0]["agent_name"], "coder-1");
|
||||
assert_eq!(parsed["records"][0]["input_tokens"], 100);
|
||||
assert_eq!(parsed["totals"]["records"], 1);
|
||||
assert!((parsed["totals"]["total_cost_usd"].as_f64().unwrap() - 1.57).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_get_token_usage_filters_by_story_id() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
let ctx = test_ctx(root);
|
||||
|
||||
let usage = crate::agents::TokenUsage {
|
||||
input_tokens: 50,
|
||||
output_tokens: 60,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
total_cost_usd: 0.5,
|
||||
};
|
||||
let r1 = crate::agents::token_usage::build_record("10_story_a", "coder-1", usage.clone());
|
||||
let r2 = crate::agents::token_usage::build_record("20_story_b", "coder-2", usage);
|
||||
crate::agents::token_usage::append_record(root, &r1).unwrap();
|
||||
crate::agents::token_usage::append_record(root, &r2).unwrap();
|
||||
|
||||
let result =
|
||||
tool_get_token_usage(&json!({"story_id": "10_story_a"}), &ctx).unwrap();
|
||||
let parsed: Value = serde_json::from_str(&result).unwrap();
|
||||
assert_eq!(parsed["records"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(parsed["records"][0]["story_id"], "10_story_a");
|
||||
assert_eq!(parsed["totals"]["records"], 1);
|
||||
}
|
||||
|
||||
// ── tool_list_worktrees tests ─────────────────────────────────
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user