story-kit: merge 296_story_track_per_agent_token_usage_for_cost_visibility_and_optimisation

This commit is contained in:
Dave
2026-03-19 09:53:32 +00:00
parent 6c413e1fc7
commit 9cdb0d4ea8
5 changed files with 411 additions and 9 deletions

View File

@@ -5,11 +5,17 @@ use std::sync::{Arc, Mutex};
use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system};
use tokio::sync::broadcast;
use super::AgentEvent;
use super::{AgentEvent, TokenUsage};
use crate::agent_log::AgentLogWriter;
use crate::slog;
use crate::slog_warn;
/// Result from a PTY agent session, containing the session ID and token usage.
pub(super) struct PtyResult {
pub session_id: Option<String>,
pub token_usage: Option<TokenUsage>,
}
fn composite_key(story_id: &str, agent_name: &str) -> String {
format!("{story_id}:{agent_name}")
}
@@ -41,7 +47,7 @@ pub(super) async fn run_agent_pty_streaming(
log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
inactivity_timeout_secs: u64,
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
) -> Result<Option<String>, String> {
) -> Result<PtyResult, String> {
let sid = story_id.to_string();
let aname = agent_name.to_string();
let cmd = command.to_string();
@@ -156,7 +162,7 @@ fn run_agent_pty_blocking(
log_writer: Option<&Mutex<AgentLogWriter>>,
inactivity_timeout_secs: u64,
child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
) -> Result<Option<String>, String> {
) -> Result<PtyResult, String> {
let pty_system = native_pty_system();
let pair = pty_system
@@ -251,6 +257,7 @@ fn run_agent_pty_blocking(
};
let mut session_id: Option<String> = None;
let mut token_usage: Option<TokenUsage> = None;
loop {
let recv_result = match timeout_dur {
@@ -334,7 +341,21 @@ fn run_agent_pty_blocking(
// Complete assistant events are skipped for content extraction
// because thinking and text already arrived via stream_event.
// The raw JSON is still forwarded as AgentJson below.
"assistant" | "user" | "result" => {}
"assistant" | "user" => {}
"result" => {
// Extract token usage from the result event.
if let Some(usage) = TokenUsage::from_result_event(&json) {
slog!(
"[agent:{story_id}:{agent_name}] Token usage: in={} out={} cache_create={} cache_read={} cost=${:.4}",
usage.input_tokens,
usage.output_tokens,
usage.cache_creation_input_tokens,
usage.cache_read_input_tokens,
usage.total_cost_usd,
);
token_usage = Some(usage);
}
}
_ => {}
}
@@ -359,7 +380,10 @@ fn run_agent_pty_blocking(
session_id
);
Ok(session_id)
Ok(PtyResult {
session_id,
token_usage,
})
}
#[cfg(test)]