Files
huskies/server/src/llm/chat/run.rs
T
2026-04-29 00:29:26 +00:00

810 lines
27 KiB
Rust

//! Chat orchestration: multi-turn tool loop, side questions, and cancellation.
use crate::io::onboarding;
use crate::llm::chat::tools::{execute_tool, get_tool_definitions};
use crate::llm::prompts::{ONBOARDING_PROMPT, SYSTEM_PROMPT};
use crate::llm::providers::claude_code::ClaudeCodeResult;
use crate::llm::types::{Message, Role, ToolDefinition};
use crate::slog;
use crate::state::SessionState;
use crate::store::StoreOps;
use chrono::Utc;
use serde::Deserialize;
const MAX_TURNS: usize = 30;
const KEY_ANTHROPIC_API_KEY: &str = "anthropic_api_key";
/// Configuration for the LLM provider used during a chat session.
#[derive(Deserialize, Clone)]
pub struct ProviderConfig {
pub provider: String,
pub model: String,
pub base_url: Option<String>,
pub enable_tools: Option<bool>,
/// Claude Code session ID for conversation resumption.
pub session_id: Option<String>,
}
/// Result of a chat call, including messages and optional metadata.
#[allow(dead_code)]
#[derive(Debug)]
pub struct ChatResult {
pub messages: Vec<Message>,
/// Session ID returned by Claude Code for resumption.
pub session_id: Option<String>,
}
/// Prepend an ISO-8601 UTC timestamp to the content of the last user message.
///
/// Only the most-recent user message is annotated so that prior turns in the
/// conversation history are not re-stamped on every call. Assistant, system,
/// and tool messages are left untouched.
fn inject_received_at(messages: &mut [Message], ts: &str) {
if let Some(msg) = messages.iter_mut().rev().find(|m| m.role == Role::User) {
msg.content = format!("[{ts}] {}", msg.content);
}
}
fn get_anthropic_api_key_impl(store: &dyn StoreOps) -> Result<String, String> {
match store.get(KEY_ANTHROPIC_API_KEY) {
Some(value) => {
if let Some(key) = value.as_str() {
if key.is_empty() {
Err("Anthropic API key is empty. Please set your API key.".to_string())
} else {
Ok(key.to_string())
}
} else {
Err("Stored API key is not a string".to_string())
}
}
None => Err("Anthropic API key not found. Please set your API key.".to_string()),
}
}
/// Returns the list of available Ollama models from the given base URL.
pub async fn get_ollama_models(base_url: Option<String>) -> Result<Vec<String>, String> {
use crate::llm::providers::ollama::OllamaProvider;
let url = base_url.unwrap_or_else(|| "http://localhost:11434".to_string());
OllamaProvider::get_models(&url).await
}
/// Build a prompt for Claude Code that includes prior conversation history.
///
/// When a Claude Code session cannot be resumed (no session_id), we embed
/// the prior messages as a structured preamble so the LLM retains context.
/// If there is only one user message (the current one), the content is
/// returned as-is with no preamble.
fn build_claude_code_context_prompt(messages: &[Message], latest_user_content: &str) -> String {
// Collect prior messages (everything except the trailing user message).
let prior: Vec<&Message> = messages
.iter()
.rev()
.skip(1) // skip the latest user message
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect();
if prior.is_empty() {
return latest_user_content.to_string();
}
let mut parts = Vec::new();
parts.push("<conversation_history>".to_string());
for msg in &prior {
let label = match msg.role {
Role::User => "User",
Role::Assistant => "Assistant",
Role::Tool => "Tool",
Role::System => continue,
};
parts.push(format!("[{}]: {}", label, msg.content));
}
parts.push("</conversation_history>".to_string());
parts.push(String::new());
parts.push(latest_user_content.to_string());
parts.join("\n")
}
/// Sends a signal to cancel the active chat for the given session.
pub fn cancel_chat(state: &SessionState) -> Result<(), String> {
state.cancel_tx.send(true).map_err(|e| e.to_string())?;
Ok(())
}
/// Run a multi-turn chat with tool calling against the configured provider.
#[allow(clippy::too_many_arguments)]
pub async fn chat<F, U, T, A>(
mut messages: Vec<Message>,
config: ProviderConfig,
state: &SessionState,
store: &dyn StoreOps,
mut on_update: F,
mut on_token: U,
mut on_thinking: T,
mut on_activity: A,
) -> Result<ChatResult, String>
where
F: FnMut(&[Message]) + Send,
U: FnMut(&str) + Send,
T: FnMut(&str) + Send,
A: FnMut(&str) + Send,
{
use crate::llm::providers::anthropic::AnthropicProvider;
use crate::llm::providers::ollama::OllamaProvider;
// Stamp the current user message with the wall-clock time at the transport
// boundary (i.e. when this function is entered, which is immediately after
// the WebSocket frame is received — before any LLM invocation).
let received_at = Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
inject_received_at(&mut messages, &received_at);
let _ = state.cancel_tx.send(false);
let mut cancel_rx = state.cancel_rx.clone();
cancel_rx.borrow_and_update();
let base_url = config
.base_url
.clone()
.unwrap_or_else(|| "http://localhost:11434".to_string());
slog!("[chat] provider={} model={}", config.provider, config.model);
let is_claude_code = config.provider == "claude-code";
let is_claude = !is_claude_code && config.model.starts_with("claude-");
if !is_claude_code && !is_claude && config.provider.as_str() != "ollama" {
return Err(format!("Unsupported provider: {}", config.provider));
}
// Claude Code provider: bypasses our tool loop entirely.
// Claude Code has its own agent loop, tools, and context management.
// We pipe the user message in, stream text tokens for live display, and
// collect the structured messages (assistant turns + tool results) from
// the stream-json output for the final message history.
if is_claude_code {
use crate::llm::providers::claude_code::ClaudeCodeProvider;
let latest_user_content = messages
.iter()
.rev()
.find(|m| m.role == Role::User)
.map(|m| m.content.clone())
.ok_or_else(|| "No user message found".to_string())?;
// When resuming with a session_id, Claude Code loads its own transcript
// from disk — the latest user message is sufficient. Without a
// session_id (e.g. after a page refresh) the prior conversation context
// would be lost because Claude Code only receives a single prompt
// string. In that case, prepend the conversation history so the LLM
// retains full context even though the session cannot be resumed.
let user_message = if config.session_id.is_some() {
latest_user_content
} else {
build_claude_code_context_prompt(&messages, &latest_user_content)
};
let project_root = state
.get_project_root()
.unwrap_or_else(|_| std::path::PathBuf::from("."));
let provider = ClaudeCodeProvider::new();
let ClaudeCodeResult {
messages: cc_messages,
session_id,
} = provider
.chat_stream(
&user_message,
&project_root.to_string_lossy(),
config.session_id.as_deref(),
None,
&mut cancel_rx,
|token| on_token(token),
|thinking| on_thinking(thinking),
|tool_name| on_activity(tool_name),
)
.await
.map_err(|e| format!("Claude Code Error: {e}"))?;
// Build the final message history: user messages + Claude Code's turns.
// If the session produced no structured messages (e.g. empty response),
// fall back to an empty assistant message so the UI stops loading.
let mut result = messages.clone();
if cc_messages.is_empty() {
result.push(Message {
role: Role::Assistant,
content: String::new(),
tool_calls: None,
tool_call_id: None,
});
} else {
result.extend(cc_messages);
}
on_update(&result);
return Ok(ChatResult {
messages: result,
session_id,
});
}
let tool_defs = get_tool_definitions();
let tools: &[ToolDefinition] = if config.enable_tools.unwrap_or(true) {
tool_defs.as_slice()
} else {
&[]
};
let mut current_history = messages.clone();
// Build the system prompt — append onboarding instructions when the
// project's spec files still contain scaffold placeholders.
let system_content = {
let mut content = SYSTEM_PROMPT.to_string();
if let Ok(root) = state.get_project_root() {
let status = onboarding::check_onboarding_status(&root);
if status.needs_onboarding() {
content.push_str("\n\n");
content.push_str(ONBOARDING_PROMPT);
}
}
content
};
current_history.insert(
0,
Message {
role: Role::System,
content: system_content,
tool_calls: None,
tool_call_id: None,
},
);
current_history.insert(
1,
Message {
role: Role::System,
content: "REMINDER: Distinguish between showing examples (use code blocks in chat) vs implementing changes (use write_file tool). Keywords like 'show me', 'example', 'how does' = chat response. Keywords like 'create', 'add', 'implement', 'fix' = use tools."
.to_string(),
tool_calls: None,
tool_call_id: None,
},
);
let mut new_messages: Vec<Message> = Vec::new();
let mut turn_count = 0;
loop {
if *cancel_rx.borrow() {
return Err("Chat cancelled by user".to_string());
}
if turn_count >= MAX_TURNS {
return Err("Max conversation turns reached.".to_string());
}
turn_count += 1;
let response = if is_claude {
let api_key = get_anthropic_api_key_impl(store)?;
let anthropic_provider = AnthropicProvider::new(api_key);
anthropic_provider
.chat_stream(
&config.model,
&current_history,
tools,
&mut cancel_rx,
|token| on_token(token),
|tool_name| on_activity(tool_name),
)
.await
.map_err(|e| format!("Anthropic Error: {e}"))?
} else {
let ollama_provider = OllamaProvider::new(base_url.clone());
ollama_provider
.chat_stream(
&config.model,
&current_history,
tools,
&mut cancel_rx,
|token| on_token(token),
)
.await
.map_err(|e| format!("Ollama Error: {e}"))?
};
if let Some(tool_calls) = response.tool_calls {
let assistant_msg = Message {
role: Role::Assistant,
content: response.content.unwrap_or_default(),
tool_calls: Some(tool_calls.clone()),
tool_call_id: None,
};
current_history.push(assistant_msg.clone());
new_messages.push(assistant_msg);
on_update(&current_history[2..]);
for call in tool_calls {
if *cancel_rx.borrow() {
return Err("Chat cancelled before tool execution".to_string());
}
let output = execute_tool(&call, state).await;
let tool_msg = Message {
role: Role::Tool,
content: output,
tool_calls: None,
tool_call_id: call.id,
};
current_history.push(tool_msg.clone());
new_messages.push(tool_msg);
on_update(&current_history[2..]);
}
} else {
let assistant_msg = Message {
role: Role::Assistant,
content: response.content.unwrap_or_default(),
tool_calls: None,
tool_call_id: None,
};
new_messages.push(assistant_msg.clone());
current_history.push(assistant_msg);
on_update(&current_history[2..]);
break;
}
}
Ok(ChatResult {
messages: current_history[2..].to_vec(),
session_id: None,
})
}
/// Answer a one-off side question using the existing conversation as context.
///
/// Unlike `chat`, this function:
/// - Does NOT perform tool calls.
/// - Does NOT modify the main conversation history.
/// - Does NOT touch the shared cancel signal.
/// - Performs a single LLM call and returns the response text.
pub async fn side_question<U>(
context_messages: Vec<Message>,
question: String,
config: ProviderConfig,
store: &dyn StoreOps,
mut on_token: U,
) -> Result<String, String>
where
U: FnMut(&str) + Send,
{
use crate::llm::providers::anthropic::AnthropicProvider;
use crate::llm::providers::ollama::OllamaProvider;
// Use a local cancel channel that is never cancelled, so the side question
// runs to completion independently of any main chat cancel signal.
// Keep `_cancel_tx` alive for the duration of the function so the channel
// stays open and `changed()` inside the providers does not spuriously fire.
let (_cancel_tx, cancel_rx) = tokio::sync::watch::channel(false);
let mut cancel_rx = cancel_rx;
cancel_rx.borrow_and_update();
let base_url = config
.base_url
.clone()
.unwrap_or_else(|| "http://localhost:11434".to_string());
let is_claude_code = config.provider == "claude-code";
let is_claude = !is_claude_code && config.model.starts_with("claude-");
// Build a minimal history: existing context + the side question.
let mut history = context_messages;
history.push(Message {
role: Role::User,
content: question,
tool_calls: None,
tool_call_id: None,
});
// No tools for side questions.
let tools: &[ToolDefinition] = &[];
let response = if is_claude {
let api_key = get_anthropic_api_key_impl(store)?;
let provider = AnthropicProvider::new(api_key);
provider
.chat_stream(
&config.model,
&history,
tools,
&mut cancel_rx,
|token| on_token(token),
|_tool_name| {},
)
.await
.map_err(|e| format!("Anthropic Error: {e}"))?
} else if is_claude_code {
return Err("Claude Code provider does not support side questions".to_string());
} else {
let provider = OllamaProvider::new(base_url);
provider
.chat_stream(&config.model, &history, tools, &mut cancel_rx, |token| {
on_token(token)
})
.await
.map_err(|e| format!("Ollama Error: {e}"))?
};
Ok(response.content.unwrap_or_default())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm::types::Message;
use crate::state::SessionState;
use crate::store::StoreOps;
use std::collections::HashMap;
use std::sync::Mutex;
// ---------------------------------------------------------------------------
// Minimal in-memory StoreOps mock
// ---------------------------------------------------------------------------
struct MockStore {
data: Mutex<HashMap<String, serde_json::Value>>,
save_should_fail: bool,
}
impl MockStore {
fn new() -> Self {
Self {
data: Mutex::new(HashMap::new()),
save_should_fail: false,
}
}
}
impl StoreOps for MockStore {
fn get(&self, key: &str) -> Option<serde_json::Value> {
self.data.lock().ok().and_then(|m| m.get(key).cloned())
}
fn set(&self, key: &str, value: serde_json::Value) {
if let Ok(mut m) = self.data.lock() {
m.insert(key.to_string(), value);
}
}
fn delete(&self, key: &str) {
if let Ok(mut m) = self.data.lock() {
m.remove(key);
}
}
fn save(&self) -> Result<(), String> {
if self.save_should_fail {
Err("mock save error".to_string())
} else {
Ok(())
}
}
}
// ---------------------------------------------------------------------------
// inject_received_at
// ---------------------------------------------------------------------------
#[test]
fn inject_timestamp_into_last_user_message() {
let mut messages = vec![Message {
role: Role::User,
content: "hello".to_string(),
tool_calls: None,
tool_call_id: None,
}];
inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
assert_eq!(messages[0].content, "[2026-04-28T10:30:00Z] hello");
}
#[test]
fn inject_timestamp_only_last_user_message() {
let mut messages = vec![
Message {
role: Role::User,
content: "first".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::Assistant,
content: "reply".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::User,
content: "second".to_string(),
tool_calls: None,
tool_call_id: None,
},
];
inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
// Only the last user message is stamped.
assert_eq!(messages[0].content, "first");
assert_eq!(messages[1].content, "reply");
assert_eq!(messages[2].content, "[2026-04-28T10:30:00Z] second");
}
#[test]
fn inject_timestamp_skips_assistant_messages() {
let mut messages = vec![Message {
role: Role::Assistant,
content: "bot reply".to_string(),
tool_calls: None,
tool_call_id: None,
}];
inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
// No user message — nothing changes.
assert_eq!(messages[0].content, "bot reply");
}
#[test]
fn inject_timestamp_does_not_stamp_system_messages() {
let mut messages = vec![
Message {
role: Role::System,
content: "sys".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::User,
content: "hello".to_string(),
tool_calls: None,
tool_call_id: None,
},
];
inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
assert_eq!(messages[0].content, "sys");
assert_eq!(messages[1].content, "[2026-04-28T10:30:00Z] hello");
}
// ---------------------------------------------------------------------------
// cancel_chat
// ---------------------------------------------------------------------------
#[test]
fn cancel_chat_sends_true_to_channel() {
let state = SessionState::default();
let result = cancel_chat(&state);
assert!(result.is_ok());
assert!(*state.cancel_rx.borrow());
}
// ---------------------------------------------------------------------------
// chat — unsupported provider early return (no network calls)
// ---------------------------------------------------------------------------
#[tokio::test]
async fn chat_rejects_unknown_provider() {
let state = SessionState::default();
let store = MockStore::new();
let config = ProviderConfig {
provider: "unsupported-provider".to_string(),
model: "some-model".to_string(),
base_url: None,
enable_tools: None,
session_id: None,
};
let messages = vec![Message {
role: Role::User,
content: "hello".to_string(),
tool_calls: None,
tool_call_id: None,
}];
let result = chat(
messages,
config,
&state,
&store,
|_| {},
|_| {},
|_| {},
|_| {},
)
.await;
assert!(result.is_err());
assert!(
result
.unwrap_err()
.contains("Unsupported provider: unsupported-provider")
);
}
// ---------------------------------------------------------------------------
// chat — ollama path exercises system prompt insertion + tool setup
// (connection to a non-existent port fails fast)
// ---------------------------------------------------------------------------
#[tokio::test]
async fn chat_ollama_bad_url_fails_with_ollama_error() {
let state = SessionState::default();
let store = MockStore::new();
let config = ProviderConfig {
provider: "ollama".to_string(),
model: "llama3".to_string(),
// Port 1 is reserved / closed — connection fails immediately.
base_url: Some("http://127.0.0.1:1".to_string()),
enable_tools: Some(false),
session_id: None,
};
let messages = vec![Message {
role: Role::User,
content: "ping".to_string(),
tool_calls: None,
tool_call_id: None,
}];
let result = chat(
messages,
config,
&state,
&store,
|_| {},
|_| {},
|_| {},
|_| {},
)
.await;
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.contains("Ollama Error:"), "unexpected error: {err}");
}
// ---------------------------------------------------------------------------
// chat — Anthropic model prefix detection (fails without API key)
// ---------------------------------------------------------------------------
#[tokio::test]
async fn chat_claude_model_without_api_key_returns_error() {
let state = SessionState::default();
// No API key in store → should fail with "API key not found"
let store = MockStore::new();
let config = ProviderConfig {
provider: "anthropic".to_string(),
model: "claude-3-haiku-20240307".to_string(),
base_url: None,
enable_tools: Some(false),
session_id: None,
};
let messages = vec![Message {
role: Role::User,
content: "hello".to_string(),
tool_calls: None,
tool_call_id: None,
}];
let result = chat(
messages,
config,
&state,
&store,
|_| {},
|_| {},
|_| {},
|_| {},
)
.await;
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.contains("API key"),
"expected API key error, got: {err}"
);
}
// ---------------------------------------------------------------------------
// build_claude_code_context_prompt (Bug 245)
// ---------------------------------------------------------------------------
#[test]
fn context_prompt_single_message_returns_content_as_is() {
let messages = vec![Message {
role: Role::User,
content: "hello".to_string(),
tool_calls: None,
tool_call_id: None,
}];
let result = build_claude_code_context_prompt(&messages, "hello");
assert_eq!(result, "hello");
}
#[test]
fn context_prompt_includes_prior_conversation() {
let messages = vec![
Message {
role: Role::User,
content: "What is Rust?".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::Assistant,
content: "Rust is a systems language.".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::User,
content: "Tell me more".to_string(),
tool_calls: None,
tool_call_id: None,
},
];
let result = build_claude_code_context_prompt(&messages, "Tell me more");
assert!(
result.contains("<conversation_history>"),
"should have history preamble"
);
assert!(
result.contains("[User]: What is Rust?"),
"should include prior user message"
);
assert!(
result.contains("[Assistant]: Rust is a systems language."),
"should include prior assistant message"
);
assert!(
result.contains("</conversation_history>"),
"should close history block"
);
assert!(
result.ends_with("Tell me more"),
"should end with latest user message"
);
}
#[test]
fn context_prompt_skips_system_messages() {
let messages = vec![
Message {
role: Role::System,
content: "You are a helpful assistant.".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::User,
content: "hi".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::Assistant,
content: "hello".to_string(),
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::User,
content: "bye".to_string(),
tool_calls: None,
tool_call_id: None,
},
];
let result = build_claude_code_context_prompt(&messages, "bye");
assert!(
!result.contains("helpful assistant"),
"should not include system messages"
);
assert!(result.contains("[User]: hi"));
assert!(result.contains("[Assistant]: hello"));
}
}