diff --git a/.living_spec/specs/functional/UI_UX.md b/.living_spec/specs/functional/UI_UX.md index c269cde..79390e7 100644 --- a/.living_spec/specs/functional/UI_UX.md +++ b/.living_spec/specs/functional/UI_UX.md @@ -338,3 +338,69 @@ Provide a clear, accessible way for users to start a new session by clearing the - "Clear Chat" (direct but less friendly) - "Start Over" (conversational) - Icon: 🔄 or ⊕ (plus in circle) + +## Context Window Usage Display + +### Problem +Users have no visibility into how much of the model's context window they're using. This leads to: +- Unexpected quality degradation when context limit is reached +- Uncertainty about when to start a new session +- Inability to gauge conversation length + +### Solution: Real-time Context Usage Indicator +Display a persistent indicator showing current token usage vs. model's context window limit. + +### Requirements + +1. **Visual Indicator:** Always visible in header area +2. **Real-time Updates:** Updates as messages are added +3. **Model-Aware:** Shows correct limit based on selected model +4. **Color Coding:** Visual warning as limit approaches + - Green/default: 0-74% usage + - Yellow/warning: 75-89% usage + - Red/danger: 90-100% usage +5. **Clear Format:** "2.5K / 8K tokens (31%)" or similar +6. **Token Estimation:** Approximate token count for all messages + +### Implementation Notes + +**Token Estimation:** +- Use simple approximation: 1 token ≈ 4 characters +- Or integrate `gpt-tokenizer` for more accuracy +- Count: system prompts + user messages + assistant responses + tool outputs + tool calls + +**Model Context Windows:** +- llama3.1, llama3.2: 8K tokens +- qwen2.5-coder: 32K tokens +- deepseek-coder: 16K tokens +- Default/unknown: 8K tokens + +**Calculation:** +```tsx +const estimateTokens = (text: string): number => { + return Math.ceil(text.length / 4); +}; + +const calculateContextUsage = (messages: Message[], systemPrompt: string) => { + let total = estimateTokens(systemPrompt); + messages.forEach(msg => { + total += estimateTokens(msg.content); + if (msg.tool_calls) { + total += estimateTokens(JSON.stringify(msg.tool_calls)); + } + }); + return total; +}; +``` + +**UI Placement:** +- Header area, near model selector +- Non-intrusive but always visible +- Optional tooltip with breakdown on hover + +### Edge Cases +- Empty conversation: Show "0 / 8K" +- During streaming: Include partial content +- After clearing: Reset to 0 +- Model change: Update context window limit + diff --git a/.living_spec/stories/17_display_remaining_context.md b/.living_spec/stories/17_display_remaining_context.md deleted file mode 100644 index e69de29..0000000 diff --git a/.living_spec/stories/archive/17_display_remaining_context.md b/.living_spec/stories/archive/17_display_remaining_context.md new file mode 100644 index 0000000..1de6b8b --- /dev/null +++ b/.living_spec/stories/archive/17_display_remaining_context.md @@ -0,0 +1,82 @@ +# Story 17: Display Context Window Usage + +## User Story +As a user, I want to see how much of the model's context window I'm currently using, so that I know when I'm approaching the limit and should start a new session to avoid losing conversation quality. + +## Acceptance Criteria +- [x] A visual indicator shows the current context usage (e.g., "2.5K / 8K tokens" or percentage) +- [x] The indicator is always visible in the UI (header area recommended) +- [x] The display updates in real-time as messages are added +- [x] Different models show their appropriate context window size (e.g., 8K for llama3.1, 128K for larger models) +- [x] The indicator changes color or style when approaching the limit (e.g., yellow at 75%, red at 90%) +- [x] Hovering over the indicator shows more details (tokens per message breakdown - optional) +- [x] The calculation includes system prompts, user messages, assistant responses, and tool outputs +- [x] Token counting is reasonably accurate (doesn't need to be perfect, estimate is fine) + +## Out of Scope +- Exact token counting (approximation is acceptable) +- Automatic session clearing when limit reached +- Per-message token counts in the UI +- Token usage history or analytics +- Different tokenizers for different models (use one estimation method) +- Backend token tracking from Ollama (estimate on frontend) + +## Technical Notes + +### Token Estimation +- Simple approximation: 1 token ≈ 4 characters (English text) +- Or use a basic tokenizer library like `gpt-tokenizer` or `tiktoken` (JS port) +- Count all message content: system prompts + user messages + assistant responses + tool outputs +- Include tool call JSON in the count + +### Context Window Sizes +Common model context windows: +- llama3.1, llama3.2: 8K tokens (8,192) +- qwen2.5-coder: 32K tokens +- deepseek-coder: 16K tokens +- Default/unknown: 8K tokens + +### Implementation Approach +```tsx +// Simple character-based estimation +const estimateTokens = (text: string): number => { + return Math.ceil(text.length / 4); +}; + +const calculateTotalTokens = (messages: Message[]): number => { + let total = 0; + // Add system prompt tokens (from backend) + total += estimateTokens(SYSTEM_PROMPT); + + // Add all message tokens + for (const msg of messages) { + total += estimateTokens(msg.content); + if (msg.tool_calls) { + total += estimateTokens(JSON.stringify(msg.tool_calls)); + } + } + + return total; +}; +``` + +### UI Placement +- Header area, right side near model selector +- Format: "2.5K / 8K tokens (31%)" +- Color coding: + - Green/default: 0-74% + - Yellow/warning: 75-89% + - Red/danger: 90-100% + +## Design Considerations +- Keep it subtle and non-intrusive +- Should be informative but not alarming +- Consider a small progress bar or circular indicator +- Example: "📊 2,450 / 8,192 (30%)" +- Or icon-based: "🟢 30% context" + +## Future Enhancements (Not in this story) +- Backend token counting from Ollama (if available) +- Per-message token display on hover +- "Summarize and continue" feature to compress history +- Export/archive conversation before clearing \ No newline at end of file diff --git a/src/components/Chat.tsx b/src/components/Chat.tsx index d16dfbd..eb01566 100644 --- a/src/components/Chat.tsx +++ b/src/components/Chat.tsx @@ -7,624 +7,690 @@ import { oneDark } from "react-syntax-highlighter/dist/esm/styles/prism"; import type { Message, ProviderConfig } from "../types"; interface ChatProps { - projectPath: string; - onCloseProject: () => void; + projectPath: string; + onCloseProject: () => void; } export function Chat({ projectPath, onCloseProject }: ChatProps) { - const [messages, setMessages] = useState([]); - const [input, setInput] = useState(""); - const [loading, setLoading] = useState(false); - const [model, setModel] = useState("llama3.1"); // Default local model - const [enableTools, setEnableTools] = useState(true); - const [availableModels, setAvailableModels] = useState([]); - const [streamingContent, setStreamingContent] = useState(""); - const messagesEndRef = useRef(null); - const inputRef = useRef(null); + const [messages, setMessages] = useState([]); + const [input, setInput] = useState(""); + const [loading, setLoading] = useState(false); + const [model, setModel] = useState("llama3.1"); // Default local model + const [enableTools, setEnableTools] = useState(true); + const [availableModels, setAvailableModels] = useState([]); + const [streamingContent, setStreamingContent] = useState(""); + const messagesEndRef = useRef(null); + const inputRef = useRef(null); - useEffect(() => { - invoke("get_ollama_models") - .then(async (models) => { - if (models.length > 0) { - setAvailableModels(models); + // Token estimation and context window tracking + const estimateTokens = (text: string): number => { + return Math.ceil(text.length / 4); + }; - // Check backend store for saved model - try { - const savedModel = await invoke( - "get_model_preference", - ); - if (savedModel && models.includes(savedModel)) { - setModel(savedModel); - } else if (!models.includes(model)) { - setModel(models[0]); - } - } catch (e) { - console.error(e); - } - } - }) - .catch((err) => console.error(err)); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [model]); + const getContextWindowSize = (modelName: string): number => { + if (modelName.includes("llama3")) return 8192; + if (modelName.includes("qwen2.5")) return 32768; + if (modelName.includes("deepseek")) return 16384; + return 8192; // Default + }; - useEffect(() => { - const unlistenUpdatePromise = listen("chat:update", (event) => { - setMessages(event.payload); - setStreamingContent(""); // Clear streaming content when final update arrives - }); + const calculateContextUsage = (): { + used: number; + total: number; + percentage: number; + } => { + let totalTokens = 0; - const unlistenTokenPromise = listen("chat:token", (event) => { - setStreamingContent((prev) => prev + event.payload); - }); + // System prompts (approximate) + totalTokens += 200; - return () => { - unlistenUpdatePromise.then((unlisten) => unlisten()); - unlistenTokenPromise.then((unlisten) => unlisten()); - }; - }, []); + // All messages + for (const msg of messages) { + totalTokens += estimateTokens(msg.content); + if (msg.tool_calls) { + totalTokens += estimateTokens(JSON.stringify(msg.tool_calls)); + } + } - const scrollToBottom = () => { - messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); - }; + // Streaming content + if (streamingContent) { + totalTokens += estimateTokens(streamingContent); + } - // biome-ignore lint/correctness/useExhaustiveDependencies: We intentionally trigger on messages/streamingContent changes - useEffect(scrollToBottom, [messages, streamingContent]); + const contextWindow = getContextWindowSize(model); + const percentage = Math.round((totalTokens / contextWindow) * 100); - useEffect(() => { - inputRef.current?.focus(); - }, []); + return { + used: totalTokens, + total: contextWindow, + percentage, + }; + }; - const sendMessage = async () => { - if (!input.trim() || loading) return; + const contextUsage = calculateContextUsage(); - const userMsg: Message = { role: "user", content: input }; - const newHistory = [...messages, userMsg]; + const getContextEmoji = (percentage: number): string => { + if (percentage >= 90) return "🔴"; + if (percentage >= 75) return "🟡"; + return "🟢"; + }; - setMessages(newHistory); - setInput(""); - setLoading(true); - setStreamingContent(""); // Clear any previous streaming content + useEffect(() => { + invoke("get_ollama_models") + .then(async (models) => { + if (models.length > 0) { + setAvailableModels(models); - try { - const config: ProviderConfig = { - provider: "ollama", - model: model, - base_url: "http://localhost:11434", - enable_tools: enableTools, - }; + // Check backend store for saved model + try { + const savedModel = await invoke( + "get_model_preference", + ); + if (savedModel && models.includes(savedModel)) { + setModel(savedModel); + } else if (!models.includes(model)) { + setModel(models[0]); + } + } catch (e) { + console.error(e); + } + } + }) + .catch((err) => console.error(err)); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [model]); - // Invoke backend chat command - // We rely on 'chat:update' events to update the state in real-time - await invoke("chat", { - messages: newHistory, - config: config, - }); - } catch (e) { - console.error(e); - setMessages((prev) => [ - ...prev, - { role: "assistant", content: `**Error:** ${e}` }, - ]); - } finally { - setLoading(false); - } - }; + useEffect(() => { + const unlistenUpdatePromise = listen("chat:update", (event) => { + setMessages(event.payload); + setStreamingContent(""); // Clear streaming content when final update arrives + }); - const clearSession = () => { - const confirmed = window.confirm( - "Are you sure? This will clear all messages and reset the conversation context.", - ); - if (confirmed) { - setMessages([]); - setStreamingContent(""); - setLoading(false); - // TODO: Add backend call to clear context when implemented - // invoke("clear_session").catch(console.error); - } - }; + const unlistenTokenPromise = listen("chat:token", (event) => { + setStreamingContent((prev) => prev + event.payload); + }); - return ( -
- {/* Sticky Header */} -
- {/* Project Info */} -
-
- {projectPath} -
- -
+ return () => { + unlistenUpdatePromise.then((unlisten) => unlisten()); + unlistenTokenPromise.then((unlisten) => unlisten()); + }; + }, []); - {/* Model Controls */} -
- - {availableModels.length > 0 ? ( - - ) : ( - { - const newModel = e.target.value; - setModel(newModel); - invoke("set_model_preference", { model: newModel }).catch( - console.error, - ); - }} - placeholder="Model" - style={{ - padding: "6px 12px", - borderRadius: "99px", - border: "none", - fontSize: "0.9em", - background: "#2f2f2f", - color: "#ececec", - outline: "none", - }} - /> - )} - -
-
+ const scrollToBottom = () => { + messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); + }; - {/* Messages Area */} -
-
- {messages.map((msg, idx) => ( -
-
- {msg.role === "user" ? ( - msg.content - ) : msg.role === "tool" ? ( -
- - â–¶ - - Tool Output - {msg.tool_call_id && ` (${msg.tool_call_id})`} - - -
-											{msg.content}
-										
-
- ) : ( -
- { - const match = /language-(\w+)/.exec(className || ""); - const isInline = !className; - return !isInline && match ? ( - - {String(children).replace(/\n$/, "")} - - ) : ( - - {children} - - ); - }, - }} - > - {msg.content} - -
- )} + // biome-ignore lint/correctness/useExhaustiveDependencies: We intentionally trigger on messages/streamingContent changes + useEffect(scrollToBottom, [messages, streamingContent]); - {/* Show Tool Calls if present */} - {msg.tool_calls && ( -
- {msg.tool_calls.map((tc, i) => { - // Parse arguments to extract key info - let argsSummary = ""; - try { - const args = JSON.parse(tc.function.arguments); - const firstKey = Object.keys(args)[0]; - if (firstKey && args[firstKey]) { - argsSummary = String(args[firstKey]); - // Truncate if too long - if (argsSummary.length > 50) { - argsSummary = `${argsSummary.substring(0, 47)}...`; - } - } - } catch (_e) { - // If parsing fails, just show empty - } + useEffect(() => { + inputRef.current?.focus(); + }, []); - return ( -
- â–¶ - - {tc.function.name} - {argsSummary && `(${argsSummary})`} - -
- ); - })} -
- )} -
-
- ))} - {loading && streamingContent && ( -
-
- { - const match = /language-(\w+)/.exec(className || ""); - const isInline = !className; - return !isInline && match ? ( - - {String(children).replace(/\n$/, "")} - - ) : ( - - {children} - - ); - }, - }} - > - {streamingContent} - -
-
- )} - {loading && !streamingContent && ( -
- Thinking... -
- )} -
-
-
+ const sendMessage = async () => { + if (!input.trim() || loading) return; - {/* Input Area */} -
-
- setInput(e.target.value)} - onKeyDown={(e) => e.key === "Enter" && sendMessage()} - placeholder="Send a message..." - style={{ - width: "100%", - padding: "14px 20px", - paddingRight: "50px", // space for button - borderRadius: "24px", - border: "1px solid #333", - outline: "none", - fontSize: "1rem", - fontWeight: "500", - background: "#2f2f2f", - color: "#ececec", - boxShadow: "0 2px 6px rgba(0,0,0,0.02)", - }} - /> - -
-
-
- ); + const userMsg: Message = { role: "user", content: input }; + const newHistory = [...messages, userMsg]; + + setMessages(newHistory); + setInput(""); + setLoading(true); + setStreamingContent(""); // Clear any previous streaming content + + try { + const config: ProviderConfig = { + provider: "ollama", + model: model, + base_url: "http://localhost:11434", + enable_tools: enableTools, + }; + + // Invoke backend chat command + // We rely on 'chat:update' events to update the state in real-time + await invoke("chat", { + messages: newHistory, + config: config, + }); + } catch (e) { + console.error(e); + setMessages((prev) => [ + ...prev, + { role: "assistant", content: `**Error:** ${e}` }, + ]); + } finally { + setLoading(false); + } + }; + + const clearSession = () => { + const confirmed = window.confirm( + "Are you sure? This will clear all messages and reset the conversation context.", + ); + if (confirmed) { + setMessages([]); + setStreamingContent(""); + setLoading(false); + // TODO: Add backend call to clear context when implemented + // invoke("clear_session").catch(console.error); + } + }; + + return ( +
+ {/* Sticky Header */} +
+ {/* Project Info */} +
+
+ {projectPath} +
+ +
+ + {/* Model Controls */} +
+ {/* Context Usage Indicator */} +
+ {getContextEmoji(contextUsage.percentage)} {contextUsage.percentage} + % +
+ + + {availableModels.length > 0 ? ( + + ) : ( + { + const newModel = e.target.value; + setModel(newModel); + invoke("set_model_preference", { model: newModel }).catch( + console.error, + ); + }} + placeholder="Model" + style={{ + padding: "6px 12px", + borderRadius: "99px", + border: "none", + fontSize: "0.9em", + background: "#2f2f2f", + color: "#ececec", + outline: "none", + }} + /> + )} + +
+
+ + {/* Messages Area */} +
+
+ {messages.map((msg, idx) => ( +
+
+ {msg.role === "user" ? ( + msg.content + ) : msg.role === "tool" ? ( +
+ + â–¶ + + Tool Output + {msg.tool_call_id && ` (${msg.tool_call_id})`} + + +
+                      {msg.content}
+                    
+
+ ) : ( +
+ { + const match = /language-(\w+)/.exec(className || ""); + const isInline = !className; + return !isInline && match ? ( + + {String(children).replace(/\n$/, "")} + + ) : ( + + {children} + + ); + }, + }} + > + {msg.content} + +
+ )} + + {/* Show Tool Calls if present */} + {msg.tool_calls && ( +
+ {msg.tool_calls.map((tc, i) => { + // Parse arguments to extract key info + let argsSummary = ""; + try { + const args = JSON.parse(tc.function.arguments); + const firstKey = Object.keys(args)[0]; + if (firstKey && args[firstKey]) { + argsSummary = String(args[firstKey]); + // Truncate if too long + if (argsSummary.length > 50) { + argsSummary = `${argsSummary.substring(0, 47)}...`; + } + } + } catch (_e) { + // If parsing fails, just show empty + } + + return ( +
+ â–¶ + + {tc.function.name} + {argsSummary && `(${argsSummary})`} + +
+ ); + })} +
+ )} +
+
+ ))} + {loading && streamingContent && ( +
+
+ { + const match = /language-(\w+)/.exec(className || ""); + const isInline = !className; + return !isInline && match ? ( + + {String(children).replace(/\n$/, "")} + + ) : ( + + {children} + + ); + }, + }} + > + {streamingContent} + +
+
+ )} + {loading && !streamingContent && ( +
+ Thinking... +
+ )} +
+
+
+ + {/* Input Area */} +
+
+ setInput(e.target.value)} + onKeyDown={(e) => e.key === "Enter" && sendMessage()} + placeholder="Send a message..." + style={{ + width: "100%", + padding: "14px 20px", + paddingRight: "50px", // space for button + borderRadius: "24px", + border: "1px solid #333", + outline: "none", + fontSize: "1rem", + fontWeight: "500", + background: "#2f2f2f", + color: "#ececec", + boxShadow: "0 2px 6px rgba(0,0,0,0.02)", + }} + /> + +
+
+
+ ); }