story-kit: merge 296_story_track_per_agent_token_usage_for_cost_visibility_and_optimisation
This commit is contained in:
@@ -5,11 +5,17 @@ use std::sync::{Arc, Mutex};
|
||||
use portable_pty::{ChildKiller, CommandBuilder, PtySize, native_pty_system};
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
use super::AgentEvent;
|
||||
use super::{AgentEvent, TokenUsage};
|
||||
use crate::agent_log::AgentLogWriter;
|
||||
use crate::slog;
|
||||
use crate::slog_warn;
|
||||
|
||||
/// Result from a PTY agent session, containing the session ID and token usage.
|
||||
pub(super) struct PtyResult {
|
||||
pub session_id: Option<String>,
|
||||
pub token_usage: Option<TokenUsage>,
|
||||
}
|
||||
|
||||
fn composite_key(story_id: &str, agent_name: &str) -> String {
|
||||
format!("{story_id}:{agent_name}")
|
||||
}
|
||||
@@ -41,7 +47,7 @@ pub(super) async fn run_agent_pty_streaming(
|
||||
log_writer: Option<Arc<Mutex<AgentLogWriter>>>,
|
||||
inactivity_timeout_secs: u64,
|
||||
child_killers: Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
) -> Result<Option<String>, String> {
|
||||
) -> Result<PtyResult, String> {
|
||||
let sid = story_id.to_string();
|
||||
let aname = agent_name.to_string();
|
||||
let cmd = command.to_string();
|
||||
@@ -156,7 +162,7 @@ fn run_agent_pty_blocking(
|
||||
log_writer: Option<&Mutex<AgentLogWriter>>,
|
||||
inactivity_timeout_secs: u64,
|
||||
child_killers: &Arc<Mutex<HashMap<String, Box<dyn ChildKiller + Send + Sync>>>>,
|
||||
) -> Result<Option<String>, String> {
|
||||
) -> Result<PtyResult, String> {
|
||||
let pty_system = native_pty_system();
|
||||
|
||||
let pair = pty_system
|
||||
@@ -251,6 +257,7 @@ fn run_agent_pty_blocking(
|
||||
};
|
||||
|
||||
let mut session_id: Option<String> = None;
|
||||
let mut token_usage: Option<TokenUsage> = None;
|
||||
|
||||
loop {
|
||||
let recv_result = match timeout_dur {
|
||||
@@ -334,7 +341,21 @@ fn run_agent_pty_blocking(
|
||||
// Complete assistant events are skipped for content extraction
|
||||
// because thinking and text already arrived via stream_event.
|
||||
// The raw JSON is still forwarded as AgentJson below.
|
||||
"assistant" | "user" | "result" => {}
|
||||
"assistant" | "user" => {}
|
||||
"result" => {
|
||||
// Extract token usage from the result event.
|
||||
if let Some(usage) = TokenUsage::from_result_event(&json) {
|
||||
slog!(
|
||||
"[agent:{story_id}:{agent_name}] Token usage: in={} out={} cache_create={} cache_read={} cost=${:.4}",
|
||||
usage.input_tokens,
|
||||
usage.output_tokens,
|
||||
usage.cache_creation_input_tokens,
|
||||
usage.cache_read_input_tokens,
|
||||
usage.total_cost_usd,
|
||||
);
|
||||
token_usage = Some(usage);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -359,7 +380,10 @@ fn run_agent_pty_blocking(
|
||||
session_id
|
||||
);
|
||||
|
||||
Ok(session_id)
|
||||
Ok(PtyResult {
|
||||
session_id,
|
||||
token_usage,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user