Accept story 39: Persistent Claude Code Sessions in Web UI

Use --resume <session_id> with claude -p so the web UI claude-code-pty provider maintains full conversation context across messages, identical to a long-running terminal Claude Code session. Changes: - Capture session_id from claude -p stream-json system event - Pass --resume on subsequent messages in same chat session - Thread session_id through ProviderConfig, ChatResult, WsResponse - Frontend stores sessionId per chat, clears on New Session - Unset CLAUDECODE env to allow nested spawning from server - Wait for clean process exit to ensure transcript flush to disk Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 11:51:19 +00:00
parent cff7f5fe7f
commit cde75bd7fb
11 changed files with 9524 additions and 61 deletions
--- a/server/src/llm/providers/anthropic.rs
+++ b/server/src/llm/providers/anthropic.rs
@@ -305,6 +305,7 @@ impl AnthropicProvider {
            } else {
                Some(tool_calls)
            },
+            session_id: None,
        })
    }
 }
--- a/server/src/llm/providers/claude_code.rs
+++ b/server/src/llm/providers/claude_code.rs
@@ -11,6 +11,10 @@ use crate::llm::types::CompletionResponse;
 /// Spawns `claude -p` in a PTY so isatty() returns true (which may
 /// influence billing), while using `--output-format stream-json` to
 /// get clean, structured NDJSON output instead of TUI escape sequences.
+///
+/// Supports session resumption: if a `session_id` is provided, passes
+/// `--resume <id>` so Claude Code loads the prior conversation transcript
+/// from disk and continues with full context.
 pub struct ClaudeCodeProvider;

 impl ClaudeCodeProvider {
@@ -22,6 +26,7 @@ impl ClaudeCodeProvider {
        &self,
        user_message: &str,
        project_root: &str,
+        session_id: Option<&str>,
        cancel_rx: &mut watch::Receiver<bool>,
        mut on_token: F,
    ) -> Result<CompletionResponse, String>
@@ -30,6 +35,7 @@ impl ClaudeCodeProvider {
    {
        let message = user_message.to_string();
        let cwd = project_root.to_string();
+        let resume_id = session_id.map(|s| s.to_string());
        let cancelled = Arc::new(AtomicBool::new(false));
        let cancelled_clone = cancelled.clone();

@@ -44,9 +50,10 @@ impl ClaudeCodeProvider {
        });

        let (token_tx, mut token_rx) = tokio::sync::mpsc::unbounded_channel::<String>();
+        let (sid_tx, sid_rx) = tokio::sync::oneshot::channel::<String>();

        let pty_handle = tokio::task::spawn_blocking(move || {
-            run_pty_session(&message, &cwd, cancelled, token_tx)
+            run_pty_session(&message, &cwd, resume_id.as_deref(), cancelled, token_tx, sid_tx)
        });

        let mut full_output = String::new();
@@ -59,9 +66,12 @@ impl ClaudeCodeProvider {
            .await
            .map_err(|e| format!("PTY task panicked: {e}"))??;

+        let captured_session_id = sid_rx.await.ok();
+
        Ok(CompletionResponse {
            content: Some(full_output),
            tool_calls: None,
+            session_id: captured_session_id,
        })
    }
 }
@@ -73,8 +83,10 @@ impl ClaudeCodeProvider {
 fn run_pty_session(
    user_message: &str,
    cwd: &str,
+    resume_session_id: Option<&str>,
    cancelled: Arc<AtomicBool>,
    token_tx: tokio::sync::mpsc::UnboundedSender<String>,
+    sid_tx: tokio::sync::oneshot::Sender<String>,
 ) -> Result<(), String> {
    let pty_system = native_pty_system();

@@ -90,21 +102,36 @@ fn run_pty_session(
    let mut cmd = CommandBuilder::new("claude");
    cmd.arg("-p");
    cmd.arg(user_message);
+    if let Some(sid) = resume_session_id {
+        cmd.arg("--resume");
+        cmd.arg(sid);
+    }
    cmd.arg("--output-format");
    cmd.arg("stream-json");
    cmd.arg("--verbose");
    cmd.cwd(cwd);
    // Keep TERM reasonable but disable color
    cmd.env("NO_COLOR", "1");
+    // Allow nested spawning when the server itself runs inside Claude Code
+    cmd.env("CLAUDECODE", "");

-    eprintln!("[pty-debug] Spawning: claude -p \"{}\" --output-format stream-json --verbose", user_message);
+    eprintln!(
+        "[pty-debug] Spawning: claude -p \"{}\" {} --output-format stream-json --verbose",
+        user_message,
+        resume_session_id
+            .map(|s| format!("--resume {s}"))
+            .unwrap_or_default()
+    );

    let mut child = pair
        .slave
        .spawn_command(cmd)
        .map_err(|e| format!("Failed to spawn claude: {e}"))?;

-    eprintln!("[pty-debug] Process spawned, pid: {:?}", child.process_id());
+    eprintln!(
+        "[pty-debug] Process spawned, pid: {:?}",
+        child.process_id()
+    );
    drop(pair.slave);

    let reader = pair
@@ -141,6 +168,7 @@ fn run_pty_session(
    });

    let mut got_result = false;
+    let mut sid_tx = Some(sid_tx);

    loop {
        if cancelled.load(Ordering::Relaxed) {
@@ -155,59 +183,106 @@ fn run_pty_session(
                    continue;
                }

-                eprintln!("[pty-debug] processing: {}...", &trimmed[..trimmed.len().min(120)]);
+                eprintln!(
+                    "[pty-debug] processing: {}...",
+                    &trimmed[..trimmed.len().min(120)]
+                );

                // Try to parse as JSON
                if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed)
-                    && let Some(event_type) = json.get("type").and_then(|t| t.as_str()) {
-                        match event_type {
-                            // Streaming deltas (when --include-partial-messages is used)
-                            "stream_event" => {
-                                if let Some(event) = json.get("event") {
-                                    handle_stream_event(event, &token_tx);
-                                }
-                            }
-                            // Complete assistant message
-                            "assistant" => {
-                                if let Some(message) = json.get("message")
-                                    && let Some(content) = message.get("content").and_then(|c| c.as_array()) {
-                                        for block in content {
-                                            if let Some(text) = block.get("text").and_then(|t| t.as_str()) {
-                                                let _ = token_tx.send(text.to_string());
-                                            }
-                                        }
-                                    }
-                            }
-                            // Final result with usage stats
-                            "result" => {
-                                if let Some(cost) = json.get("total_cost_usd").and_then(|c| c.as_f64()) {
-                                    let _ = token_tx.send(format!("\n\n---\n_Cost: ${cost:.4}_\n"));
-                                }
-                                if let Some(usage) = json.get("usage") {
-                                    let input = usage.get("input_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
-                                    let output = usage.get("output_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
-                                    let cached = usage.get("cache_read_input_tokens").and_then(|t| t.as_u64()).unwrap_or(0);
-                                    let _ = token_tx.send(format!("_Tokens: {input} in / {output} out / {cached} cached_\n"));
-                                }
-                                got_result = true;
-                            }
-                            // System init — log billing info
-                            "system" => {
-                                let api_source = json.get("apiKeySource").and_then(|s| s.as_str()).unwrap_or("unknown");
-                                let model = json.get("model").and_then(|s| s.as_str()).unwrap_or("unknown");
-                                let _ = token_tx.send(format!("_[{model} | apiKey: {api_source}]_\n\n"));
-                            }
-                            // Rate limit info
-                            "rate_limit_event" => {
-                                if let Some(info) = json.get("rate_limit_info") {
-                                    let status = info.get("status").and_then(|s| s.as_str()).unwrap_or("unknown");
-                                    let limit_type = info.get("rateLimitType").and_then(|s| s.as_str()).unwrap_or("unknown");
-                                    let _ = token_tx.send(format!("_[rate limit: {status} ({limit_type})]_\n\n"));
-                                }
-                            }
-                            _ => {}
+                    && let Some(event_type) = json.get("type").and_then(|t| t.as_str())
+                {
+                    // Capture session_id from any event that has it
+                    if let Some(tx) = sid_tx.take() {
+                        if let Some(sid) = json.get("session_id").and_then(|s| s.as_str()) {
+                            let _ = tx.send(sid.to_string());
+                        } else {
+                            // Put it back if this event didn't have a session_id
+                            sid_tx = Some(tx);
                        }
                    }
+
+                    match event_type {
+                        // Streaming deltas (when --include-partial-messages is used)
+                        "stream_event" => {
+                            if let Some(event) = json.get("event") {
+                                handle_stream_event(event, &token_tx);
+                            }
+                        }
+                        // Complete assistant message
+                        "assistant" => {
+                            if let Some(message) = json.get("message")
+                                && let Some(content) =
+                                    message.get("content").and_then(|c| c.as_array())
+                            {
+                                for block in content {
+                                    if let Some(text) =
+                                        block.get("text").and_then(|t| t.as_str())
+                                    {
+                                        let _ = token_tx.send(text.to_string());
+                                    }
+                                }
+                            }
+                        }
+                        // Final result with usage stats
+                        "result" => {
+                            if let Some(cost) =
+                                json.get("total_cost_usd").and_then(|c| c.as_f64())
+                            {
+                                let _ =
+                                    token_tx.send(format!("\n\n---\n_Cost: ${cost:.4}_\n"));
+                            }
+                            if let Some(usage) = json.get("usage") {
+                                let input = usage
+                                    .get("input_tokens")
+                                    .and_then(|t| t.as_u64())
+                                    .unwrap_or(0);
+                                let output = usage
+                                    .get("output_tokens")
+                                    .and_then(|t| t.as_u64())
+                                    .unwrap_or(0);
+                                let cached = usage
+                                    .get("cache_read_input_tokens")
+                                    .and_then(|t| t.as_u64())
+                                    .unwrap_or(0);
+                                let _ = token_tx.send(format!(
+                                    "_Tokens: {input} in / {output} out / {cached} cached_\n"
+                                ));
+                            }
+                            got_result = true;
+                        }
+                        // System init — log billing info
+                        "system" => {
+                            let api_source = json
+                                .get("apiKeySource")
+                                .and_then(|s| s.as_str())
+                                .unwrap_or("unknown");
+                            let model = json
+                                .get("model")
+                                .and_then(|s| s.as_str())
+                                .unwrap_or("unknown");
+                            let _ = token_tx
+                                .send(format!("_[{model} | apiKey: {api_source}]_\n\n"));
+                        }
+                        // Rate limit info
+                        "rate_limit_event" => {
+                            if let Some(info) = json.get("rate_limit_info") {
+                                let status = info
+                                    .get("status")
+                                    .and_then(|s| s.as_str())
+                                    .unwrap_or("unknown");
+                                let limit_type = info
+                                    .get("rateLimitType")
+                                    .and_then(|s| s.as_str())
+                                    .unwrap_or("unknown");
+                                let _ = token_tx.send(format!(
+                                    "_[rate limit: {status} ({limit_type})]_\n\n"
+                                ));
+                            }
+                        }
+                        _ => {}
+                    }
+                }
                // Ignore non-JSON lines (terminal escape sequences)

                if got_result {
@@ -226,9 +301,9 @@ fn run_pty_session(
                                .get("type")
                                .filter(|t| t.as_str() == Some("stream_event"))
                                .and_then(|_| json.get("event"))
-                            {
-                                handle_stream_event(event, &token_tx);
-                            }
+                        {
+                            handle_stream_event(event, &token_tx);
+                        }
                    }
                    break;
                }
@@ -240,7 +315,23 @@ fn run_pty_session(
        let _ = got_result;
    }

-    let _ = child.kill();
+    // Wait briefly for Claude Code to flush its session transcript to disk.
+    // The `result` event means the API response is done, but the process
+    // still needs to write the conversation to the JSONL session file.
+    match child.try_wait() {
+        Ok(Some(_)) => {} // Already exited
+        _ => {
+            // Give it up to 2 seconds to exit cleanly
+            for _ in 0..20 {
+                std::thread::sleep(std::time::Duration::from_millis(100));
+                if let Ok(Some(_)) = child.try_wait() {
+                    break;
+                }
+            }
+            // If still running after 2s, kill it
+            let _ = child.kill();
+        }
+    }
    Ok(())
 }

@@ -263,7 +354,9 @@ fn handle_stream_event(
                        }
                    }
                    "thinking_delta" => {
-                        if let Some(thinking) = delta.get("thinking").and_then(|t| t.as_str()) {
+                        if let Some(thinking) =
+                            delta.get("thinking").and_then(|t| t.as_str())
+                        {
                            let _ = token_tx.send(format!("[thinking] {thinking}"));
                        }
                    }
--- a/server/src/llm/providers/ollama.rs
+++ b/server/src/llm/providers/ollama.rs
@@ -178,6 +178,7 @@ impl OllamaProvider {
                Some(accumulated_content)
            },
            tool_calls: final_tool_calls,
+            session_id: None,
        })
    }
 }