huskies/server/src/llm/chat/run.rs

//! Chat orchestration: multi-turn tool loop, side questions, and cancellation.
use crate::io::onboarding;
use crate::llm::chat::tools::{execute_tool, get_tool_definitions};
use crate::llm::prompts::{ONBOARDING_PROMPT, SYSTEM_PROMPT};
use crate::llm::providers::claude_code::ClaudeCodeResult;
use crate::llm::types::{Message, Role, ToolDefinition};
use crate::slog;
use crate::state::SessionState;
use crate::store::StoreOps;
use chrono::Utc;
use serde::Deserialize;

const MAX_TURNS: usize = 30;
const KEY_ANTHROPIC_API_KEY: &str = "anthropic_api_key";

/// Configuration for the LLM provider used during a chat session.
#[derive(Deserialize, Clone)]
pub struct ProviderConfig {
    pub provider: String,
    pub model: String,
    pub base_url: Option<String>,
    pub enable_tools: Option<bool>,
    /// Claude Code session ID for conversation resumption.
    pub session_id: Option<String>,
}

/// Result of a chat call, including messages and optional metadata.
#[allow(dead_code)]
#[derive(Debug)]
pub struct ChatResult {
    pub messages: Vec<Message>,
    /// Session ID returned by Claude Code for resumption.
    pub session_id: Option<String>,
}

/// Prepend an ISO-8601 UTC timestamp to the content of the last user message.
///
/// Only the most-recent user message is annotated so that prior turns in the
/// conversation history are not re-stamped on every call. Assistant, system,
/// and tool messages are left untouched.
fn inject_received_at(messages: &mut [Message], ts: &str) {
    if let Some(msg) = messages.iter_mut().rev().find(|m| m.role == Role::User) {
        msg.content = format!("[{ts}] {}", msg.content);
    }
}

fn get_anthropic_api_key_impl(store: &dyn StoreOps) -> Result<String, String> {
    match store.get(KEY_ANTHROPIC_API_KEY) {
        Some(value) => {
            if let Some(key) = value.as_str() {
                if key.is_empty() {
                    Err("Anthropic API key is empty. Please set your API key.".to_string())
                } else {
                    Ok(key.to_string())
                }
            } else {
                Err("Stored API key is not a string".to_string())
            }
        }
        None => Err("Anthropic API key not found. Please set your API key.".to_string()),
    }
}

/// Returns the list of available Ollama models from the given base URL.
pub async fn get_ollama_models(base_url: Option<String>) -> Result<Vec<String>, String> {
    use crate::llm::providers::ollama::OllamaProvider;
    let url = base_url.unwrap_or_else(|| "http://localhost:11434".to_string());
    OllamaProvider::get_models(&url).await
}

/// Build a prompt for Claude Code that includes prior conversation history.
///
/// When a Claude Code session cannot be resumed (no session_id), we embed
/// the prior messages as a structured preamble so the LLM retains context.
/// If there is only one user message (the current one), the content is
/// returned as-is with no preamble.
fn build_claude_code_context_prompt(messages: &[Message], latest_user_content: &str) -> String {
    // Collect prior messages (everything except the trailing user message).
    let prior: Vec<&Message> = messages
        .iter()
        .rev()
        .skip(1) // skip the latest user message
        .collect::<Vec<_>>()
        .into_iter()
        .rev()
        .collect();

    if prior.is_empty() {
        return latest_user_content.to_string();
    }

    let mut parts = Vec::new();
    parts.push("<conversation_history>".to_string());
    for msg in &prior {
        let label = match msg.role {
            Role::User => "User",
            Role::Assistant => "Assistant",
            Role::Tool => "Tool",
            Role::System => continue,
        };
        parts.push(format!("[{}]: {}", label, msg.content));
    }
    parts.push("</conversation_history>".to_string());
    parts.push(String::new());
    parts.push(latest_user_content.to_string());
    parts.join("\n")
}

/// Sends a signal to cancel the active chat for the given session.
pub fn cancel_chat(state: &SessionState) -> Result<(), String> {
    state.cancel_tx.send(true).map_err(|e| e.to_string())?;
    Ok(())
}

/// Run a multi-turn chat with tool calling against the configured provider.
#[allow(clippy::too_many_arguments)]
pub async fn chat<F, U, T, A>(
    mut messages: Vec<Message>,
    config: ProviderConfig,
    state: &SessionState,
    store: &dyn StoreOps,
    mut on_update: F,
    mut on_token: U,
    mut on_thinking: T,
    mut on_activity: A,
) -> Result<ChatResult, String>
where
    F: FnMut(&[Message]) + Send,
    U: FnMut(&str) + Send,
    T: FnMut(&str) + Send,
    A: FnMut(&str) + Send,
{
    use crate::llm::providers::anthropic::AnthropicProvider;
    use crate::llm::providers::ollama::OllamaProvider;

    // Stamp the current user message with the wall-clock time at the transport
    // boundary (i.e. when this function is entered, which is immediately after
    // the WebSocket frame is received — before any LLM invocation).
    let received_at = Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
    inject_received_at(&mut messages, &received_at);

    let _ = state.cancel_tx.send(false);
    let mut cancel_rx = state.cancel_rx.clone();
    cancel_rx.borrow_and_update();

    let base_url = config
        .base_url
        .clone()
        .unwrap_or_else(|| "http://localhost:11434".to_string());

    slog!("[chat] provider={} model={}", config.provider, config.model);
    let is_claude_code = config.provider == "claude-code";
    let is_claude = !is_claude_code && config.model.starts_with("claude-");

    if !is_claude_code && !is_claude && config.provider.as_str() != "ollama" {
        return Err(format!("Unsupported provider: {}", config.provider));
    }

    // Claude Code provider: bypasses our tool loop entirely.
    // Claude Code has its own agent loop, tools, and context management.
    // We pipe the user message in, stream text tokens for live display, and
    // collect the structured messages (assistant turns + tool results) from
    // the stream-json output for the final message history.
    if is_claude_code {
        use crate::llm::providers::claude_code::ClaudeCodeProvider;

        let latest_user_content = messages
            .iter()
            .rev()
            .find(|m| m.role == Role::User)
            .map(|m| m.content.clone())
            .ok_or_else(|| "No user message found".to_string())?;

        // When resuming with a session_id, Claude Code loads its own transcript
        // from disk — the latest user message is sufficient.  Without a
        // session_id (e.g. after a page refresh) the prior conversation context
        // would be lost because Claude Code only receives a single prompt
        // string.  In that case, prepend the conversation history so the LLM
        // retains full context even though the session cannot be resumed.
        let user_message = if config.session_id.is_some() {
            latest_user_content
        } else {
            build_claude_code_context_prompt(&messages, &latest_user_content)
        };

        let project_root = state
            .get_project_root()
            .unwrap_or_else(|_| std::path::PathBuf::from("."));

        let provider = ClaudeCodeProvider::new();
        let ClaudeCodeResult {
            messages: cc_messages,
            session_id,
        } = provider
            .chat_stream(
                &user_message,
                &project_root.to_string_lossy(),
                config.session_id.as_deref(),
                None,
                &mut cancel_rx,
                |token| on_token(token),
                |thinking| on_thinking(thinking),
                |tool_name| on_activity(tool_name),
            )
            .await
            .map_err(|e| format!("Claude Code Error: {e}"))?;

        // Build the final message history: user messages + Claude Code's turns.
        // If the session produced no structured messages (e.g. empty response),
        // fall back to an empty assistant message so the UI stops loading.
        let mut result = messages.clone();
        if cc_messages.is_empty() {
            result.push(Message {
                role: Role::Assistant,
                content: String::new(),
                tool_calls: None,
                tool_call_id: None,
            });
        } else {
            result.extend(cc_messages);
        }
        on_update(&result);
        return Ok(ChatResult {
            messages: result,
            session_id,
        });
    }

    let tool_defs = get_tool_definitions();
    let tools: &[ToolDefinition] = if config.enable_tools.unwrap_or(true) {
        tool_defs.as_slice()
    } else {
        &[]
    };

    let mut current_history = messages.clone();

    // Build the system prompt — append onboarding instructions when the
    // project's spec files still contain scaffold placeholders.
    let system_content = {
        let mut content = SYSTEM_PROMPT.to_string();
        if let Ok(root) = state.get_project_root() {
            let status = onboarding::check_onboarding_status(&root);
            if status.needs_onboarding() {
                content.push_str("\n\n");
                content.push_str(ONBOARDING_PROMPT);
            }
        }
        content
    };

    current_history.insert(
        0,
        Message {
            role: Role::System,
            content: system_content,
            tool_calls: None,
            tool_call_id: None,
        },
    );

    current_history.insert(
        1,
        Message {
            role: Role::System,
            content: "REMINDER: Distinguish between showing examples (use code blocks in chat) vs implementing changes (use write_file tool). Keywords like 'show me', 'example', 'how does' = chat response. Keywords like 'create', 'add', 'implement', 'fix' = use tools."
                .to_string(),
            tool_calls: None,
            tool_call_id: None,
        },
    );

    let mut new_messages: Vec<Message> = Vec::new();
    let mut turn_count = 0;

    loop {
        if *cancel_rx.borrow() {
            return Err("Chat cancelled by user".to_string());
        }

        if turn_count >= MAX_TURNS {
            return Err("Max conversation turns reached.".to_string());
        }
        turn_count += 1;

        let response = if is_claude {
            let api_key = get_anthropic_api_key_impl(store)?;
            let anthropic_provider = AnthropicProvider::new(api_key);
            anthropic_provider
                .chat_stream(
                    &config.model,
                    &current_history,
                    tools,
                    &mut cancel_rx,
                    |token| on_token(token),
                    |tool_name| on_activity(tool_name),
                )
                .await
                .map_err(|e| format!("Anthropic Error: {e}"))?
        } else {
            let ollama_provider = OllamaProvider::new(base_url.clone());
            ollama_provider
                .chat_stream(
                    &config.model,
                    &current_history,
                    tools,
                    &mut cancel_rx,
                    |token| on_token(token),
                )
                .await
                .map_err(|e| format!("Ollama Error: {e}"))?
        };

        if let Some(tool_calls) = response.tool_calls {
            let assistant_msg = Message {
                role: Role::Assistant,
                content: response.content.unwrap_or_default(),
                tool_calls: Some(tool_calls.clone()),
                tool_call_id: None,
            };

            current_history.push(assistant_msg.clone());
            new_messages.push(assistant_msg);
            on_update(&current_history[2..]);

            for call in tool_calls {
                if *cancel_rx.borrow() {
                    return Err("Chat cancelled before tool execution".to_string());
                }

                let output = execute_tool(&call, state).await;

                let tool_msg = Message {
                    role: Role::Tool,
                    content: output,
                    tool_calls: None,
                    tool_call_id: call.id,
                };

                current_history.push(tool_msg.clone());
                new_messages.push(tool_msg);
                on_update(&current_history[2..]);
            }
        } else {
            let assistant_msg = Message {
                role: Role::Assistant,
                content: response.content.unwrap_or_default(),
                tool_calls: None,
                tool_call_id: None,
            };

            new_messages.push(assistant_msg.clone());
            current_history.push(assistant_msg);
            on_update(&current_history[2..]);
            break;
        }
    }

    Ok(ChatResult {
        messages: current_history[2..].to_vec(),
        session_id: None,
    })
}

/// Answer a one-off side question using the existing conversation as context.
///
/// Unlike `chat`, this function:
/// - Does NOT perform tool calls.
/// - Does NOT modify the main conversation history.
/// - Does NOT touch the shared cancel signal.
/// - Performs a single LLM call and returns the response text.
pub async fn side_question<U>(
    context_messages: Vec<Message>,
    question: String,
    config: ProviderConfig,
    store: &dyn StoreOps,
    mut on_token: U,
) -> Result<String, String>
where
    U: FnMut(&str) + Send,
{
    use crate::llm::providers::anthropic::AnthropicProvider;
    use crate::llm::providers::ollama::OllamaProvider;

    // Use a local cancel channel that is never cancelled, so the side question
    // runs to completion independently of any main chat cancel signal.
    // Keep `_cancel_tx` alive for the duration of the function so the channel
    // stays open and `changed()` inside the providers does not spuriously fire.
    let (_cancel_tx, cancel_rx) = tokio::sync::watch::channel(false);
    let mut cancel_rx = cancel_rx;
    cancel_rx.borrow_and_update();

    let base_url = config
        .base_url
        .clone()
        .unwrap_or_else(|| "http://localhost:11434".to_string());

    let is_claude_code = config.provider == "claude-code";
    let is_claude = !is_claude_code && config.model.starts_with("claude-");

    // Build a minimal history: existing context + the side question.
    let mut history = context_messages;
    history.push(Message {
        role: Role::User,
        content: question,
        tool_calls: None,
        tool_call_id: None,
    });

    // No tools for side questions.
    let tools: &[ToolDefinition] = &[];

    let response = if is_claude {
        let api_key = get_anthropic_api_key_impl(store)?;
        let provider = AnthropicProvider::new(api_key);
        provider
            .chat_stream(
                &config.model,
                &history,
                tools,
                &mut cancel_rx,
                |token| on_token(token),
                |_tool_name| {},
            )
            .await
            .map_err(|e| format!("Anthropic Error: {e}"))?
    } else if is_claude_code {
        return Err("Claude Code provider does not support side questions".to_string());
    } else {
        let provider = OllamaProvider::new(base_url);
        provider
            .chat_stream(&config.model, &history, tools, &mut cancel_rx, |token| {
                on_token(token)
            })
            .await
            .map_err(|e| format!("Ollama Error: {e}"))?
    };

    Ok(response.content.unwrap_or_default())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::llm::types::Message;
    use crate::state::SessionState;
    use crate::store::StoreOps;
    use std::collections::HashMap;
    use std::sync::Mutex;

    // ---------------------------------------------------------------------------
    // Minimal in-memory StoreOps mock
    // ---------------------------------------------------------------------------

    struct MockStore {
        data: Mutex<HashMap<String, serde_json::Value>>,
        save_should_fail: bool,
    }

    impl MockStore {
        fn new() -> Self {
            Self {
                data: Mutex::new(HashMap::new()),
                save_should_fail: false,
            }
        }
    }

    impl StoreOps for MockStore {
        fn get(&self, key: &str) -> Option<serde_json::Value> {
            self.data.lock().ok().and_then(|m| m.get(key).cloned())
        }

        fn set(&self, key: &str, value: serde_json::Value) {
            if let Ok(mut m) = self.data.lock() {
                m.insert(key.to_string(), value);
            }
        }

        fn delete(&self, key: &str) {
            if let Ok(mut m) = self.data.lock() {
                m.remove(key);
            }
        }

        fn save(&self) -> Result<(), String> {
            if self.save_should_fail {
                Err("mock save error".to_string())
            } else {
                Ok(())
            }
        }
    }

    // ---------------------------------------------------------------------------
    // inject_received_at
    // ---------------------------------------------------------------------------

    #[test]
    fn inject_timestamp_into_last_user_message() {
        let mut messages = vec![Message {
            role: Role::User,
            content: "hello".to_string(),
            tool_calls: None,
            tool_call_id: None,
        }];
        inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
        assert_eq!(messages[0].content, "[2026-04-28T10:30:00Z] hello");
    }

    #[test]
    fn inject_timestamp_only_last_user_message() {
        let mut messages = vec![
            Message {
                role: Role::User,
                content: "first".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::Assistant,
                content: "reply".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::User,
                content: "second".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
        ];
        inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
        // Only the last user message is stamped.
        assert_eq!(messages[0].content, "first");
        assert_eq!(messages[1].content, "reply");
        assert_eq!(messages[2].content, "[2026-04-28T10:30:00Z] second");
    }

    #[test]
    fn inject_timestamp_skips_assistant_messages() {
        let mut messages = vec![Message {
            role: Role::Assistant,
            content: "bot reply".to_string(),
            tool_calls: None,
            tool_call_id: None,
        }];
        inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
        // No user message — nothing changes.
        assert_eq!(messages[0].content, "bot reply");
    }

    #[test]
    fn inject_timestamp_does_not_stamp_system_messages() {
        let mut messages = vec![
            Message {
                role: Role::System,
                content: "sys".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::User,
                content: "hello".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
        ];
        inject_received_at(&mut messages, "2026-04-28T10:30:00Z");
        assert_eq!(messages[0].content, "sys");
        assert_eq!(messages[1].content, "[2026-04-28T10:30:00Z] hello");
    }

    // ---------------------------------------------------------------------------
    // cancel_chat
    // ---------------------------------------------------------------------------

    #[test]
    fn cancel_chat_sends_true_to_channel() {
        let state = SessionState::default();
        let result = cancel_chat(&state);
        assert!(result.is_ok());
        assert!(*state.cancel_rx.borrow());
    }

    // ---------------------------------------------------------------------------
    // chat — unsupported provider early return (no network calls)
    // ---------------------------------------------------------------------------

    #[tokio::test]
    async fn chat_rejects_unknown_provider() {
        let state = SessionState::default();
        let store = MockStore::new();
        let config = ProviderConfig {
            provider: "unsupported-provider".to_string(),
            model: "some-model".to_string(),
            base_url: None,
            enable_tools: None,
            session_id: None,
        };
        let messages = vec![Message {
            role: Role::User,
            content: "hello".to_string(),
            tool_calls: None,
            tool_call_id: None,
        }];

        let result = chat(
            messages,
            config,
            &state,
            &store,
            |_| {},
            |_| {},
            |_| {},
            |_| {},
        )
        .await;

        assert!(result.is_err());
        assert!(
            result
                .unwrap_err()
                .contains("Unsupported provider: unsupported-provider")
        );
    }

    // ---------------------------------------------------------------------------
    // chat — ollama path exercises system prompt insertion + tool setup
    //         (connection to a non-existent port fails fast)
    // ---------------------------------------------------------------------------

    #[tokio::test]
    async fn chat_ollama_bad_url_fails_with_ollama_error() {
        let state = SessionState::default();
        let store = MockStore::new();
        let config = ProviderConfig {
            provider: "ollama".to_string(),
            model: "llama3".to_string(),
            // Port 1 is reserved / closed — connection fails immediately.
            base_url: Some("http://127.0.0.1:1".to_string()),
            enable_tools: Some(false),
            session_id: None,
        };
        let messages = vec![Message {
            role: Role::User,
            content: "ping".to_string(),
            tool_calls: None,
            tool_call_id: None,
        }];

        let result = chat(
            messages,
            config,
            &state,
            &store,
            |_| {},
            |_| {},
            |_| {},
            |_| {},
        )
        .await;

        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(err.contains("Ollama Error:"), "unexpected error: {err}");
    }

    // ---------------------------------------------------------------------------
    // chat — Anthropic model prefix detection (fails without API key)
    // ---------------------------------------------------------------------------

    #[tokio::test]
    async fn chat_claude_model_without_api_key_returns_error() {
        let state = SessionState::default();
        // No API key in store → should fail with "API key not found"
        let store = MockStore::new();
        let config = ProviderConfig {
            provider: "anthropic".to_string(),
            model: "claude-3-haiku-20240307".to_string(),
            base_url: None,
            enable_tools: Some(false),
            session_id: None,
        };
        let messages = vec![Message {
            role: Role::User,
            content: "hello".to_string(),
            tool_calls: None,
            tool_call_id: None,
        }];

        let result = chat(
            messages,
            config,
            &state,
            &store,
            |_| {},
            |_| {},
            |_| {},
            |_| {},
        )
        .await;

        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            err.contains("API key"),
            "expected API key error, got: {err}"
        );
    }

    // ---------------------------------------------------------------------------
    // build_claude_code_context_prompt (Bug 245)
    // ---------------------------------------------------------------------------

    #[test]
    fn context_prompt_single_message_returns_content_as_is() {
        let messages = vec![Message {
            role: Role::User,
            content: "hello".to_string(),
            tool_calls: None,
            tool_call_id: None,
        }];
        let result = build_claude_code_context_prompt(&messages, "hello");
        assert_eq!(result, "hello");
    }

    #[test]
    fn context_prompt_includes_prior_conversation() {
        let messages = vec![
            Message {
                role: Role::User,
                content: "What is Rust?".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::Assistant,
                content: "Rust is a systems language.".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::User,
                content: "Tell me more".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
        ];
        let result = build_claude_code_context_prompt(&messages, "Tell me more");
        assert!(
            result.contains("<conversation_history>"),
            "should have history preamble"
        );
        assert!(
            result.contains("[User]: What is Rust?"),
            "should include prior user message"
        );
        assert!(
            result.contains("[Assistant]: Rust is a systems language."),
            "should include prior assistant message"
        );
        assert!(
            result.contains("</conversation_history>"),
            "should close history block"
        );
        assert!(
            result.ends_with("Tell me more"),
            "should end with latest user message"
        );
    }

    #[test]
    fn context_prompt_skips_system_messages() {
        let messages = vec![
            Message {
                role: Role::System,
                content: "You are a helpful assistant.".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::User,
                content: "hi".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::Assistant,
                content: "hello".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
            Message {
                role: Role::User,
                content: "bye".to_string(),
                tool_calls: None,
                tool_call_id: None,
            },
        ];
        let result = build_claude_code_context_prompt(&messages, "bye");
        assert!(
            !result.contains("helpful assistant"),
            "should not include system messages"
        );
        assert!(result.contains("[User]: hi"));
        assert!(result.contains("[Assistant]: hello"));
    }
}