From 4c887d93b599f29b25efb2dd71e47f6017f30614 Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 17 Feb 2026 13:34:32 +0000 Subject: [PATCH] Refocus workflow on TDD and reorganize stories --- .story_kit/README.md | 33 ++-- .story_kit/specs/00_CONTEXT.md | 12 +- .story_kit/specs/README.md | 17 -- .../specs/functional/AGENT_CAPABILITIES.md | 48 ------ .story_kit/specs/functional/AI_INTEGRATION.md | 150 ------------------ .story_kit/specs/functional/PERSISTENCE.md | 37 ----- .story_kit/specs/functional/PERSONA.md | 48 ------ .../specs/functional/PROJECT_MANAGEMENT.md | 38 ----- .story_kit/specs/tech/MODEL_SELECTION.md | 139 ---------------- .story_kit/specs/tech/STACK.md | 7 +- .../01_project_selection.md | 0 .../02_core_agent_tools.md | 0 .../{archive => archived}/03_llm_ollama.md | 0 .../04_ollama_model_detection.md | 0 .../05_persist_project_selection.md | 0 .../06_fix_ui_responsiveness.md | 0 .../07_ui_polish_sticky_header.md | 0 .../08_collapsible_tool_outputs.md | 0 .../09_remove_scroll_bars.md | 0 .../09_system_prompt_persona.md | 0 .../10_persist_model_selection.md | 0 .../11_make_text_not_centred.md | 0 .../12_be_able_to_use_claude.md | 0 .../{archive => archived}/13_stop_button.md | 0 .../14_put_cursor_in_chat_box_on_startup.md | 0 .../15_new_session_cancellation.md | 0 .../17_display_remaining_context.md | 0 .../18_streaming_responses.md | 0 .../20_start_new_session.md | 0 .../22_smart_autoscroll.md | 0 .../23_alphabetize_llm_dropdown.md | 0 .../24_tauri_to_browser_ui.md | 0 .../25_auto_scaffold_story_kit.md | 0 .../upcoming/26_define_tdd_workflow_rules.md | 16 ++ .../stories/upcoming/27_enforce_test_first.md | 14 ++ .../28_require_unit_and_integration_tests.md | 12 ++ .../29_block_acceptance_on_test_failures.md | 14 ++ .../30_protect_against_test_deletion.md | 13 ++ .../31_standardize_acceptance_test_layout.md | 14 ++ .../32_add_coverage_regression_guardrail.md | 13 ++ .../upcoming/33_test_result_summary_ui.md | 14 ++ .../34_backfill_tests_high_coverage.md | 14 ++ 42 files changed, 155 insertions(+), 498 deletions(-) delete mode 100644 .story_kit/specs/README.md delete mode 100644 .story_kit/specs/functional/AGENT_CAPABILITIES.md delete mode 100644 .story_kit/specs/functional/AI_INTEGRATION.md delete mode 100644 .story_kit/specs/functional/PERSISTENCE.md delete mode 100644 .story_kit/specs/functional/PERSONA.md delete mode 100644 .story_kit/specs/functional/PROJECT_MANAGEMENT.md delete mode 100644 .story_kit/specs/tech/MODEL_SELECTION.md rename .story_kit/stories/{archive => archived}/01_project_selection.md (100%) rename .story_kit/stories/{archive => archived}/02_core_agent_tools.md (100%) rename .story_kit/stories/{archive => archived}/03_llm_ollama.md (100%) rename .story_kit/stories/{archive => archived}/04_ollama_model_detection.md (100%) rename .story_kit/stories/{archive => archived}/05_persist_project_selection.md (100%) rename .story_kit/stories/{archive => archived}/06_fix_ui_responsiveness.md (100%) rename .story_kit/stories/{archive => archived}/07_ui_polish_sticky_header.md (100%) rename .story_kit/stories/{archive => archived}/08_collapsible_tool_outputs.md (100%) rename .story_kit/stories/{archive => archived}/09_remove_scroll_bars.md (100%) rename .story_kit/stories/{archive => archived}/09_system_prompt_persona.md (100%) rename .story_kit/stories/{archive => archived}/10_persist_model_selection.md (100%) rename .story_kit/stories/{archive => archived}/11_make_text_not_centred.md (100%) rename .story_kit/stories/{archive => archived}/12_be_able_to_use_claude.md (100%) rename .story_kit/stories/{archive => archived}/13_stop_button.md (100%) rename .story_kit/stories/{archive => archived}/14_put_cursor_in_chat_box_on_startup.md (100%) rename .story_kit/stories/{archive => archived}/15_new_session_cancellation.md (100%) rename .story_kit/stories/{archive => archived}/17_display_remaining_context.md (100%) rename .story_kit/stories/{archive => archived}/18_streaming_responses.md (100%) rename .story_kit/stories/{archive => archived}/20_start_new_session.md (100%) rename .story_kit/stories/{archive => archived}/22_smart_autoscroll.md (100%) rename .story_kit/stories/{archive => archived}/23_alphabetize_llm_dropdown.md (100%) rename .story_kit/stories/{archive => archived}/24_tauri_to_browser_ui.md (100%) rename .story_kit/stories/{archive => archived}/25_auto_scaffold_story_kit.md (100%) create mode 100644 .story_kit/stories/upcoming/26_define_tdd_workflow_rules.md create mode 100644 .story_kit/stories/upcoming/27_enforce_test_first.md create mode 100644 .story_kit/stories/upcoming/28_require_unit_and_integration_tests.md create mode 100644 .story_kit/stories/upcoming/29_block_acceptance_on_test_failures.md create mode 100644 .story_kit/stories/upcoming/30_protect_against_test_deletion.md create mode 100644 .story_kit/stories/upcoming/31_standardize_acceptance_test_layout.md create mode 100644 .story_kit/stories/upcoming/32_add_coverage_regression_guardrail.md create mode 100644 .story_kit/stories/upcoming/33_test_result_summary_ui.md create mode 100644 .story_kit/stories/upcoming/34_backfill_tests_high_coverage.md diff --git a/.story_kit/README.md b/.story_kit/README.md index b42bbdc..309e435 100644 --- a/.story_kit/README.md +++ b/.story_kit/README.md @@ -13,7 +13,7 @@ Instead of ephemeral chat prompts ("Fix this", "Add that"), we work through pers * **Specs** define the *Truth*. * **Code** defines the *Reality*. -**The Golden Rule:** You are not allowed to write code until the Spec reflects the new reality requested by the Story. +**The Golden Rule:** You are not allowed to write code until the Acceptance Criteria are captured in a test TODO file and the test plan is approved. --- @@ -25,7 +25,7 @@ When initializing a new project under this workflow, create the following struct project_root/ .story_kit |-- README.md # This document - ├── stories/ # The "Inbox" of feature requests. + ├── stories/ # Story workflow (upcoming/current/archived). ├── specs/ # The "Brain" of the project. │ ├── README.md # Explains this workflow to future sessions. │ ├── 00_CONTEXT.md # High-level goals, domain definition, and glossary. @@ -45,31 +45,36 @@ When the user asks for a feature, follow this 4-step loop strictly: ### Step 1: The Story (Ingest) * **User Input:** "I want the robot to dance." -* **Action:** Create a file `stories/XX_robot_dance.md`. +* **Action:** Create a file in `stories/upcoming/` (e.g., `stories/upcoming/XX_robot_dance.md`). +* **Move to Current:** Once the story is validated and ready for coding, move it to `stories/current/`. +* **Create Test TODOs:** Create `tests/todo/story-XX.todo` with one comment per Acceptance Criterion (e.g., `// AC: story-XX#1 - ...`). * **Content:** * **User Story:** "As a user, I want..." * **Acceptance Criteria:** Bullet points of observable success. * **Out of scope:** Things that are out of scope so that the LLM doesn't go crazy +* **Story Quality (INVEST):** Stories should be Independent, Negotiable, Valuable, Estimable, Small, and Testable. * **Git:** Make a local feature branch for the story, named from the story (e.g., `feature/story-33-camera-format-auto-selection`). You must create and switch to the feature branch before making any edits. -### Step 2: The Spec (Digest) -* **Action:** Update the files in `specs/`. +### Step 2: Test Planning (TDD) +* **Action:** Define the test plan for the Story before any implementation. * **Logic:** - * Does `specs/functional/LOCOMOTION.md` exist? If no, create it. - * Add the "Dance" state to the state machine definition in the spec. - * Check `specs/tech/STACK.md`: Do we have an approved animation library? If no, propose adding one to the Stack or reject the feature. -* **Output:** Show the user the diff of the Spec. **Wait for approval.** + * Identify required unit tests and integration tests. + * Confirm test frameworks and commands from `specs/tech/STACK.md`. + * Ensure Acceptance Criteria are testable and mapped to planned tests. + * Each Acceptance Criterion must appear as a single TODO comment in `tests/todo/story-XX.todo`. +* **Output:** Show the user the test plan. **Wait for approval.** ### Step 3: The Implementation (Code) * **Action:** Write the code to match the *Spec* (not just the Story). * **Constraint:** adhere strictly to `specs/tech/STACK.md` (e.g., if it says "No `unwrap()`", you must not use `unwrap()`). ### Step 4: Verification (Close) -* **Action:** Write a test case that maps directly to the Acceptance Criteria in the Story. +* **Action:** For each TODO comment, write a failing test (red), delete the comment, make the test pass (green), and refactor if needed. Keep only one failing test at a time. * **Action:** Run compilation and make sure it succeeds without errors. Consult `specs/tech/STACK.md` and run all required linters listed there (treat warnings as errors). Run tests and make sure they all pass before proceeding. Ask questions here if needed. -* **Action:** Do not accept stories yourself. Ask the user if they accept the story. If they agree, move the story file to `stories/archive/`. Tell the user they should commit (this gives them the chance to exclude files via .gitignore if necessary). +* **Action:** Do not accept stories yourself. Ask the user if they accept the story. If they agree, move the story file to `stories/archived/`. Tell the user they should commit (this gives them the chance to exclude files via .gitignore if necessary). +* **Move to Archived:** After acceptance, move the story from `stories/current/` to `stories/archived/`. * **Action:** When the user accepts: - 1. Move the story file to `stories/archive/` (e.g., `mv stories/XX_story_name.md stories/archive/`) + 1. Move the story file to `stories/archived/` (e.g., `mv stories/current/XX_story_name.md stories/archived/`) 2. Commit both changes to the feature branch 3. Perform the squash merge: `git merge --squash feature/story-name` 4. Commit to master with a comprehensive commit message @@ -79,12 +84,12 @@ When the user asks for a feature, follow this 4-step loop strictly: **CRITICAL - NO SUMMARY DOCUMENTS:** * **NEVER** create a separate summary document (e.g., `STORY_XX_SUMMARY.md`, `IMPLEMENTATION_NOTES.md`, etc.) * **NEVER** write terminal output to a markdown file for "documentation purposes" -* The `specs/` folder IS the documentation. Keep it updated after each story. +* Tests are the primary source of truth. Keep test coverage and Acceptance Criteria aligned after each story. * If you find yourself typing `cat << 'EOF' > SUMMARY.md` or similar, **STOP IMMEDIATELY**. * The only files that should exist after story completion: * Updated code in `src/` * Updated specs in `specs/` - * Archived story in `stories/archive/` + * Archived story in `stories/archived/` --- diff --git a/.story_kit/specs/00_CONTEXT.md b/.story_kit/specs/00_CONTEXT.md index 7fc1aa3..92ce75f 100644 --- a/.story_kit/specs/00_CONTEXT.md +++ b/.story_kit/specs/00_CONTEXT.md @@ -1,7 +1,7 @@ # Project Context ## High-Level Goal -To build a standalone **Agentic AI Code Assistant** application as a single Rust binary that serves a Vite/React web UI and exposes a WebSocket API. The assistant will facilitate a "Story-Driven Spec Workflow" (SDSW) for software development. Unlike a passive chat interface, this assistant acts as an **Agent**, capable of using tools to read the filesystem, execute shell commands, manage git repositories, and modify code directly to implement features. +To build a standalone **Agentic AI Code Assistant** application as a single Rust binary that serves a Vite/React web UI and exposes a WebSocket API. The assistant will facilitate a test-driven development (TDD) workflow first, with both unit and integration tests providing the primary guardrails for code changes. Once the single-threaded TDD workflow is stable and usable (including compatibility with lower-cost agents), the project will evolve to a multi-agent orchestration model using Git worktrees and supervisory roles to maximize throughput. Unlike a passive chat interface, this assistant acts as an **Agent**, capable of using tools to read the filesystem, execute shell commands, manage git repositories, and modify code directly to implement features. ## Core Features 1. **Chat Interface:** A conversational UI for the user to interact with the AI assistant. @@ -9,11 +9,11 @@ To build a standalone **Agentic AI Code Assistant** application as a single Rust * **Filesystem:** Read/Write access (scoped to the target project). * **Search:** High-performance file searching (ripgrep-style) and content retrieval. * **Shell Integration:** Ability to execute approved commands (e.g., `cargo`, `npm`, `git`) to run tests, linters, and version control. -3. **Workflow Management:** Specialized tools to manage the SDSW lifecycle: - * Ingesting stories. - * Updating specs. - * Implementing code. - * Verifying results (running tests). +3. **Workflow Management:** Specialized tools to manage a TDD-first lifecycle: + * Defining test requirements (unit + integration) before code changes. + * Implementing code via red-green-refactor. + * Enforcing test and quality gates before acceptance. + * Scaling later to multi-agent orchestration with Git worktrees and supervisory checks, after the single-threaded process is stable. 4. **LLM Integration:** Connection to an LLM backend to drive the intelligence and tool selection. * **Remote:** Support for major APIs (Anthropic Claude, Google Gemini, OpenAI, etc). * **Local:** Support for local inference via Ollama. diff --git a/.story_kit/specs/README.md b/.story_kit/specs/README.md deleted file mode 100644 index a32f9af..0000000 --- a/.story_kit/specs/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Project Specs - -This folder contains the "Living Specification" for the project. It serves as the source of truth for all AI sessions. - -## Structure - -* **00_CONTEXT.md**: The high-level overview, goals, domain definition, and glossary. Start here. -* **tech/**: Implementation details, including the Tech Stack, Architecture, and Constraints. - * **STACK.md**: The technical "Constitution" (Languages, Libraries, Patterns). -* **functional/**: Domain logic and behavior descriptions, platform-agnostic. - * **01_CORE.md**: Core functional specifications. - -## Usage for LLMs - -1. **Always read 00_CONTEXT.md** and **tech/STACK.md** at the beginning of a session. -2. Before writing code, ensure the spec in this folder reflects the desired reality. -3. If a Story changes behavior, update the spec *first*, get approval, then write code. diff --git a/.story_kit/specs/functional/AGENT_CAPABILITIES.md b/.story_kit/specs/functional/AGENT_CAPABILITIES.md deleted file mode 100644 index fda2b2b..0000000 --- a/.story_kit/specs/functional/AGENT_CAPABILITIES.md +++ /dev/null @@ -1,48 +0,0 @@ -# Functional Spec: Agent Capabilities - -## Overview -The Agent interacts with the Target Project through a set of deterministic Tools. These tools are exposed as Tauri Commands to the frontend, which acts as the orchestrator for the LLM. - -## 1. Filesystem Tools -All filesystem operations are **strictly scoped** to the active `SessionState.project_root`. Attempting to access paths outside this root (e.g., `../foo`) must return an error. - -### `read_file` -* **Input:** `path: String` (Relative to project root) -* **Output:** `Result` -* **Behavior:** Returns the full text content of the file. - -### `write_file` -* **Input:** `path: String`, `content: String` -* **Output:** `Result<(), AppError>` -* **Behavior:** Overwrites the file. Creates parent directories if they don't exist. - -### `list_directory` -* **Input:** `path: String` (Relative) -* **Output:** `Result, AppError>` -* **Data Structure:** `FileEntry { name: String, kind: "file" | "dir" }` - -## 2. Search Tools -High-performance text search is critical for the Agent to "read" the codebase without dumping all files into context. - -### `search_files` -* **Input:** `query: String` (Regex or Literal), `glob: Option` -* **Output:** `Result, AppError>` -* **Engine:** Rust `ignore` crate (WalkBuilder) + `grep_searcher`. -* **Constraints:** - * Must respect `.gitignore`. - * Limit results (e.g., top 100 matches) to prevent freezing. - -## 3. Shell Tools -The Agent needs to compile code, run tests, and manage git. - -### `exec_shell` -* **Input:** `command: String`, `args: Vec` -* **Output:** `Result` -* **Data Structure:** `CommandOutput { stdout: String, stderr: String, exit_code: i32 }` -* **Security Policy:** - * **Allowlist:** `git`, `cargo`, `npm`, `yarn`, `pnpm`, `node`, `bun`, `ls`, `find`, `grep`, `mkdir`, `rm`, `mv`, `cp`, `touch`. - * **cwd:** Always executed in `SessionState.project_root`. - * **Timeout:** Hard limit (e.g., 30s) to prevent hanging processes. - -## Error Handling -All tools must return a standardized JSON error object to the frontend so the LLM knows *why* a tool failed (e.g., "File not found", "Permission denied"). diff --git a/.story_kit/specs/functional/AI_INTEGRATION.md b/.story_kit/specs/functional/AI_INTEGRATION.md deleted file mode 100644 index 10f3076..0000000 --- a/.story_kit/specs/functional/AI_INTEGRATION.md +++ /dev/null @@ -1,150 +0,0 @@ -# Functional Spec: AI Integration - -## 1. Provider Abstraction -The system uses a pluggable architecture for LLMs. The `ModelProvider` interface abstracts: -* **Generation:** Sending prompt + history + tools to the model. -* **Parsing:** Extracting text content vs. tool calls from the raw response. - -The system supports multiple LLM providers: -* **Ollama:** Local models running via Ollama server -* **Anthropic:** Claude models via Anthropic API (Story 12) - -Provider selection is **automatic** based on model name: -* Model starts with `claude-` → Anthropic provider -* Otherwise → Ollama provider - -## 2. Ollama Implementation -* **Endpoint:** `http://localhost:11434/api/chat` -* **JSON Protocol:** - * Request: `{ model: "name", messages: [...], stream: false, tools: [...] }` - * Response: Standard Ollama JSON with `message.tool_calls`. -* **Fallback:** If the specific local model doesn't support native tool calling, we may need a fallback system prompt approach, but for this story, we assume a tool-capable model (like `llama3.1` or `mistral-nemo`). - -## 3. Anthropic (Claude) Implementation - -### Endpoint -* **Base URL:** `https://api.anthropic.com/v1/messages` -* **Authentication:** Requires `x-api-key` header with Anthropic API key -* **API Version:** `anthropic-version: 2023-06-01` header required - -### API Protocol -* **Request Format:** - ```json - { - "model": "claude-3-5-sonnet-20241022", - "max_tokens": 4096, - "messages": [ - {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi!"} - ], - "tools": [...], - "stream": true - } - ``` -* **Response Format (Streaming):** - * Server-Sent Events (SSE) - * Event types: `message_start`, `content_block_start`, `content_block_delta`, `content_block_stop`, `message_stop` - * Tool calls appear as `content_block` with `type: "tool_use"` - -### Tool Format Differences -Anthropic's tool format differs from Ollama/OpenAI: - -**Anthropic Tool Definition:** -```json -{ - "name": "read_file", - "description": "Reads a file", - "input_schema": { - "type": "object", - "properties": { - "path": {"type": "string"} - }, - "required": ["path"] - } -} -``` - -**Our Internal Format:** -```json -{ - "type": "function", - "function": { - "name": "read_file", - "description": "Reads a file", - "parameters": { - "type": "object", - "properties": { - "path": {"type": "string"} - }, - "required": ["path"] - } - } -} -``` - -The backend must convert between these formats. - -### Context Windows -* **claude-3-5-sonnet-20241022:** 200,000 tokens -* **claude-3-5-haiku-20241022:** 200,000 tokens - -### API Key Storage -* **Storage:** OS keychain (macOS Keychain, Windows Credential Manager, Linux Secret Service) -* **Crate:** `keyring` for cross-platform support -* **Service Name:** `living-spec-anthropic-api-key` -* **Username:** `default` -* **Retrieval:** On first use of Claude model, check keychain. If not found, prompt user. - -## 4. Chat Loop (Backend) -The `chat` command acts as the **Agent Loop**: -1. Frontend sends: `User Message`. -2. Backend appends to `SessionState.history`. -3. Backend calls `OllamaProvider`. -4. **If Text Response:** Return text to Frontend. -5. **If Tool Call:** - * Backend executes the Tool (using the Core Tools from Story #2). - * Backend appends `ToolResult` to history. - * Backend *re-prompts* Ollama with the new history (recursion). - * Repeat until Text Response or Max Turns reached. - -## 5. Model Selection UI - -### Unified Dropdown -The model selection dropdown combines both Ollama and Anthropic models in a single list, organized by provider: - -```html - -``` - -### Model List Sources -* **Ollama:** Fetched from `http://localhost:11434/api/tags` via `get_ollama_models` command -* **Anthropic:** Hardcoded list of supported Claude models (no API to fetch available models) - -### API Key Flow -1. User selects a Claude model from dropdown -2. Frontend sends chat request to backend -3. Backend detects `claude-` prefix in model name -4. Backend checks OS keychain for stored API key -5. If not found: - - Backend returns error: "Anthropic API key not found" - - Frontend shows dialog prompting for API key - - User enters key - - Frontend calls `set_anthropic_api_key` command - - Backend stores key in OS keychain - - User retries chat request -6. If found: Backend proceeds with Anthropic API request - -## 6. Frontend State -* **Settings:** Store `selected_model` (e.g., "claude-3-5-sonnet-20241022" or "llama3.1") -* **Provider Detection:** Auto-detected from model name (frontend doesn't need to track provider separately) -* **Chat:** Display the conversation. Tool calls should be visible as "System Events" (e.g., collapsed accordions). diff --git a/.story_kit/specs/functional/PERSISTENCE.md b/.story_kit/specs/functional/PERSISTENCE.md deleted file mode 100644 index c329b1b..0000000 --- a/.story_kit/specs/functional/PERSISTENCE.md +++ /dev/null @@ -1,37 +0,0 @@ -# Functional Spec: Persistence - -## 1. Scope -The application needs to persist user preferences and session state across restarts. -The primary use case is remembering the **Last Opened Project**. - -## 2. Storage Mechanism -* **Library:** `tauri-plugin-store` -* **File:** `store.json` (located in the App Data directory). -* **Keys:** - * `last_project_path`: String (Absolute path). - * (Future) `theme`: String. - * (Future) `recent_projects`: Array. - -## 3. Startup Logic -1. **Backend Init:** - * Load `store.json`. - * Read `last_project_path`. - * Verify path exists and is a directory. - * If valid: - * Update `SessionState`. - * Return "Project Loaded" status to Frontend on init. - * If invalid/missing: - * Clear key. - * Remain in `Idle` state. - -## 4. Frontend Logic -* **On Mount:** - * Call `get_current_project()` command. - * If returns path -> Show Workspace. - * If returns null -> Show Selection Screen. -* **On "Open Project":** - * After successful open, save path to store. -* **On "Close Project":** - * Clear `SessionState`. - * Remove `last_project_path` from store. - * Show Selection Screen. diff --git a/.story_kit/specs/functional/PERSONA.md b/.story_kit/specs/functional/PERSONA.md deleted file mode 100644 index 0acdb4c..0000000 --- a/.story_kit/specs/functional/PERSONA.md +++ /dev/null @@ -1,48 +0,0 @@ -# Functional Spec: Agent Persona & System Prompt - -## 1. Role Definition -The Agent acts as a **Senior Software Engineer** embedded within the user's local environment. -**Critical:** The Agent is NOT a chatbot that suggests code. It is an AUTONOMOUS AGENT that directly executes changes via tools. - -## 2. Directives -The System Prompt must enforce the following behaviors: -1. **Action Over Suggestion:** When asked to write, create, or modify code, the Agent MUST use tools (`write_file`, `read_file`, etc.) to directly implement the changes. It must NEVER respond with code suggestions or instructions for the user to follow. -2. **Tool First:** Do not guess code. Read files first using `read_file`. -3. **Proactive Execution:** When the user requests a feature or change: - * Read relevant files to understand context - * Write the actual code using `write_file` - * Verify the changes (e.g., run tests, check syntax) - * Report completion, not suggestions -4. **Conciseness:** Do not explain "I will now do X". Just do X (call the tool). -5. **Safety:** Never modify files outside the scope (though backend enforces this, the LLM should know). -6. **Format:** When writing code, write the *whole* file if the tool requires it, or handle partials if we upgrade the tool (currently `write_file` is overwrite). - -## 3. Implementation -* **Location:** `src-tauri/src/llm/prompts.rs` -* **Injection:** The system message is prepended to the `messages` vector in `chat::chat` before sending to the Provider. -* **Reinforcement System:** For stubborn models that ignore directives, we implement a triple-reinforcement approach: - 1. **Primary System Prompt** (index 0): Full instructions with examples - 2. **Aggressive Reminder** (index 1): A second system message with critical reminders about using tools - 3. **User Message Prefix**: Each user message is prefixed with `[AGENT DIRECTIVE: You must use write_file tool to implement changes. Never suggest code.]` -* **Deduplication:** Ensure we don't stack multiple system messages if the loop runs long (though currently we reconstruct history per turn). - -## 4. The Prompt Text Requirements -The system prompt must emphasize: -* **Identity:** "You are an AI Agent with direct filesystem access" -* **Prohibition:** "DO NOT suggest code to the user. DO NOT output code blocks for the user to copy." -* **Mandate:** "When asked to implement something, USE the tools to directly write files." -* **Process:** "Read first, then write. Verify your work." -* **Tool Reminder:** List available tools explicitly and remind the Agent to use them. - -## 5. Target Models -This prompt must work effectively with: -* **Local Models:** Qwen, DeepSeek Coder, CodeLlama, Mistral, Llama 3.x -* **Remote Models:** Claude, GPT-4, Gemini - -Some local models require more explicit instructions about tool usage. The prompt should be unambiguous. - -## 6. Handling Stubborn Models -Some models (particularly coding assistants trained to suggest rather than execute) may resist using write_file even with clear instructions. For these models: -* **Use the triple-reinforcement system** (primary prompt + reminder + message prefixes) -* **Consider alternative models** that are better trained for autonomous execution (e.g., DeepSeek-Coder-V2, Llama 3.1) -* **Known issues:** Qwen3-Coder models tend to suggest code rather than write it directly, despite tool calling support diff --git a/.story_kit/specs/functional/PROJECT_MANAGEMENT.md b/.story_kit/specs/functional/PROJECT_MANAGEMENT.md deleted file mode 100644 index 12e0368..0000000 --- a/.story_kit/specs/functional/PROJECT_MANAGEMENT.md +++ /dev/null @@ -1,38 +0,0 @@ -# Functional Spec: Project Management - -## 1. Project Lifecycle State Machine -The application operates in two primary states regarding project context: - -1. **Idle (No Project):** - * The user cannot chat about code. - * The only available primary action is "Open Project". -2. **Active (Project Loaded):** - * A valid local directory path is stored in the Session State. - * Tool execution (read/write/shell) is enabled, scoped to this path. - -## 2. Selection Logic -* **Trigger:** User initiates "Open Project". -* **Mechanism:** Path entry in the selection screen. -* **Validation:** - * The backend receives the selected path. - * The backend verifies: - 1. Path exists. - 2. Path is a directory. - 3. Path is readable. - * If valid -> State transitions to **Active**. - * If invalid because the path does not exist: - * The backend creates the directory. - * The backend scaffolds the Story Kit metadata under the new project root: - * `.story_kit/README.md` - * `.story_kit/specs/README.md` - * `.story_kit/specs/00_CONTEXT.md` - * `.story_kit/specs/tech/STACK.md` - * `.story_kit/specs/functional/` (directory) - * `.story_kit/stories/archive/` (directory) - * If scaffolding succeeds -> State transitions to **Active**. - * If scaffolding fails -> Error returned to UI, State remains **Idle**. - * If invalid for other reasons -> Error returned to UI, State remains **Idle**. - -## 3. Security Boundaries -* Once a project is selected, the `SessionState` struct in Rust locks onto this path. -* All subsequent file operations must validate that their target path is a descendant of this Root Path. diff --git a/.story_kit/specs/tech/MODEL_SELECTION.md b/.story_kit/specs/tech/MODEL_SELECTION.md deleted file mode 100644 index b7ec3cd..0000000 --- a/.story_kit/specs/tech/MODEL_SELECTION.md +++ /dev/null @@ -1,139 +0,0 @@ -# Model Selection Guide - -## Overview -This application requires LLM models that support **tool calling** (function calling) and are capable of **autonomous execution** rather than just code suggestion. Not all models are suitable for agentic workflows. - -## Recommended Models - -### Primary Recommendation: GPT-OSS - -**Model:** `gpt-oss:20b` -- **Size:** 13 GB -- **Context:** 128K tokens -- **Tool Support:** ✅ Excellent -- **Autonomous Behavior:** ✅ Excellent -- **Why:** OpenAI's open-weight model specifically designed for "agentic tasks". Reliably uses `write_file` to implement changes directly rather than suggesting code. - -```bash -ollama pull gpt-oss:20b -``` - -### Alternative Options - -#### Llama 3.1 (Best Balance) -**Model:** `llama3.1:8b` -- **Size:** 4.7 GB -- **Context:** 128K tokens -- **Tool Support:** ✅ Excellent -- **Autonomous Behavior:** ✅ Good -- **Why:** Industry standard for tool calling. Well-documented, reliable, and smaller than GPT-OSS. - -```bash -ollama pull llama3.1:8b -``` - -#### Qwen 2.5 Coder (Coding Focused) -**Model:** `qwen2.5-coder:7b` or `qwen2.5-coder:14b` -- **Size:** 4.5 GB / 9 GB -- **Context:** 32K tokens -- **Tool Support:** ✅ Good -- **Autonomous Behavior:** ✅ Good -- **Why:** Specifically trained for coding tasks. Note: Use Qwen **2.5**, NOT Qwen 3. - -```bash -ollama pull qwen2.5-coder:7b -# or for more capability: -ollama pull qwen2.5-coder:14b -``` - -#### Mistral (General Purpose) -**Model:** `mistral:7b` -- **Size:** 4 GB -- **Context:** 32K tokens -- **Tool Support:** ✅ Good -- **Autonomous Behavior:** ✅ Good -- **Why:** Fast, efficient, and good at following instructions. - -```bash -ollama pull mistral:7b -``` - -## Models to Avoid - -### ❌ Qwen3-Coder -**Problem:** Despite supporting tool calling, Qwen3-Coder is trained more as a "helpful assistant" and tends to suggest code in markdown blocks rather than using `write_file` to implement changes directly. - -**Status:** Works for reading files and analysis, but not recommended for autonomous coding. - -### ❌ DeepSeek-Coder-V2 -**Problem:** Does not support tool calling at all. - -**Error:** `"registry.ollama.ai/library/deepseek-coder-v2:latest does not support tools"` - -### ❌ StarCoder / CodeLlama (older versions) -**Problem:** Most older coding models don't support tool calling or do it poorly. - -## How to Verify Tool Support - -Check if a model supports tools on the Ollama library page: -``` -https://ollama.com/library/ -``` - -Look for the "Tools" tag in the model's capabilities. - -You can also check locally: -```bash -ollama show -``` - -## Model Selection Criteria - -When choosing a model for autonomous coding, prioritize: - -1. **Tool Calling Support** - Must support function calling natively -2. **Autonomous Behavior** - Trained to execute rather than suggest -3. **Context Window** - Larger is better for complex projects (32K minimum, 128K ideal) -4. **Size vs Performance** - Balance between model size and your hardware -5. **Prompt Adherence** - Follows system instructions reliably - -## Testing a New Model - -To test if a model works for autonomous coding: - -1. Select it in the UI dropdown -2. Ask it to create a simple file: "Create a new file called test.txt with 'Hello World' in it" -3. **Expected behavior:** Uses `write_file` tool and creates the file -4. **Bad behavior:** Suggests code in markdown blocks or asks what you want to do - -If it suggests code instead of writing it, the model is not suitable for this application. - -## Context Window Management - -Current context usage (approximate): -- System prompts: ~1,000 tokens -- Tool definitions: ~300 tokens -- Per message overhead: ~50-100 tokens -- Average conversation: 2-5K tokens - -Most models will handle 20-30 exchanges before context becomes an issue. The agent loop is limited to 30 turns to prevent context exhaustion. - -## Performance Notes - -**Speed:** Smaller models (3B-8B) are faster but less capable. Larger models (20B-70B) are more reliable but slower. - -**Hardware:** -- 8B models: ~8 GB RAM -- 20B models: ~16 GB RAM -- 70B models: ~48 GB RAM (quantized) - -**Recommendation:** Start with `llama3.1:8b` for speed, upgrade to `gpt-oss:20b` for reliability. - -## Summary - -**For this application:** -1. **Best overall:** `gpt-oss:20b` (proven autonomous behavior) -2. **Best balance:** `llama3.1:8b` (fast, reliable, well-supported) -3. **For coding:** `qwen2.5-coder:7b` (specialized, but smaller context) - -**Avoid:** Qwen3-Coder, DeepSeek-Coder-V2, any model without tool support. \ No newline at end of file diff --git a/.story_kit/specs/tech/STACK.md b/.story_kit/specs/tech/STACK.md index 455b4c9..a10ddc7 100644 --- a/.story_kit/specs/tech/STACK.md +++ b/.story_kit/specs/tech/STACK.md @@ -76,15 +76,18 @@ To support both Remote and Local models, the system implements a `ModelProvider` * **Quality Gates:** * `cargo clippy --all-targets --all-features` must show 0 errors, 0 warnings * `cargo check` must succeed - * `cargo test` must pass all tests + * `cargo nextest run` must pass all tests ### TypeScript / React * **Style:** Biome formatter (replaces Prettier/ESLint). * **Linter:** Biome - Must pass with 0 errors, 0 warnings before merging. * **Types:** Shared types with Rust (via `tauri-specta` or manual interface matching) are preferred to ensure type safety across the bridge. +* **Testing:** Vitest for unit/component tests; Playwright for end-to-end tests. * **Quality Gates:** * `npx @biomejs/biome check src/` must show 0 errors, 0 warnings * `npm run build` must succeed + * `npx vitest run` must pass + * `npx playwright test` must pass * No `any` types allowed (use proper types or `unknown`) * React keys must use stable IDs, not array indices * All buttons must have explicit `type` attribute @@ -103,6 +106,8 @@ To support both Remote and Local models, the system implements a `ModelProvider` * `poem-openapi`: OpenAPI (Swagger) for non-streaming HTTP APIs. * **JavaScript:** * `react-markdown`: For rendering chat responses. + * `vitest`: Unit/component testing. + * `playwright`: End-to-end testing. ## Safety & Sandbox 1. **Project Scope:** The application must strictly enforce that it does not read/write outside the `project_root` selected by the user. diff --git a/.story_kit/stories/archive/01_project_selection.md b/.story_kit/stories/archived/01_project_selection.md similarity index 100% rename from .story_kit/stories/archive/01_project_selection.md rename to .story_kit/stories/archived/01_project_selection.md diff --git a/.story_kit/stories/archive/02_core_agent_tools.md b/.story_kit/stories/archived/02_core_agent_tools.md similarity index 100% rename from .story_kit/stories/archive/02_core_agent_tools.md rename to .story_kit/stories/archived/02_core_agent_tools.md diff --git a/.story_kit/stories/archive/03_llm_ollama.md b/.story_kit/stories/archived/03_llm_ollama.md similarity index 100% rename from .story_kit/stories/archive/03_llm_ollama.md rename to .story_kit/stories/archived/03_llm_ollama.md diff --git a/.story_kit/stories/archive/04_ollama_model_detection.md b/.story_kit/stories/archived/04_ollama_model_detection.md similarity index 100% rename from .story_kit/stories/archive/04_ollama_model_detection.md rename to .story_kit/stories/archived/04_ollama_model_detection.md diff --git a/.story_kit/stories/archive/05_persist_project_selection.md b/.story_kit/stories/archived/05_persist_project_selection.md similarity index 100% rename from .story_kit/stories/archive/05_persist_project_selection.md rename to .story_kit/stories/archived/05_persist_project_selection.md diff --git a/.story_kit/stories/archive/06_fix_ui_responsiveness.md b/.story_kit/stories/archived/06_fix_ui_responsiveness.md similarity index 100% rename from .story_kit/stories/archive/06_fix_ui_responsiveness.md rename to .story_kit/stories/archived/06_fix_ui_responsiveness.md diff --git a/.story_kit/stories/archive/07_ui_polish_sticky_header.md b/.story_kit/stories/archived/07_ui_polish_sticky_header.md similarity index 100% rename from .story_kit/stories/archive/07_ui_polish_sticky_header.md rename to .story_kit/stories/archived/07_ui_polish_sticky_header.md diff --git a/.story_kit/stories/archive/08_collapsible_tool_outputs.md b/.story_kit/stories/archived/08_collapsible_tool_outputs.md similarity index 100% rename from .story_kit/stories/archive/08_collapsible_tool_outputs.md rename to .story_kit/stories/archived/08_collapsible_tool_outputs.md diff --git a/.story_kit/stories/archive/09_remove_scroll_bars.md b/.story_kit/stories/archived/09_remove_scroll_bars.md similarity index 100% rename from .story_kit/stories/archive/09_remove_scroll_bars.md rename to .story_kit/stories/archived/09_remove_scroll_bars.md diff --git a/.story_kit/stories/archive/09_system_prompt_persona.md b/.story_kit/stories/archived/09_system_prompt_persona.md similarity index 100% rename from .story_kit/stories/archive/09_system_prompt_persona.md rename to .story_kit/stories/archived/09_system_prompt_persona.md diff --git a/.story_kit/stories/archive/10_persist_model_selection.md b/.story_kit/stories/archived/10_persist_model_selection.md similarity index 100% rename from .story_kit/stories/archive/10_persist_model_selection.md rename to .story_kit/stories/archived/10_persist_model_selection.md diff --git a/.story_kit/stories/archive/11_make_text_not_centred.md b/.story_kit/stories/archived/11_make_text_not_centred.md similarity index 100% rename from .story_kit/stories/archive/11_make_text_not_centred.md rename to .story_kit/stories/archived/11_make_text_not_centred.md diff --git a/.story_kit/stories/archive/12_be_able_to_use_claude.md b/.story_kit/stories/archived/12_be_able_to_use_claude.md similarity index 100% rename from .story_kit/stories/archive/12_be_able_to_use_claude.md rename to .story_kit/stories/archived/12_be_able_to_use_claude.md diff --git a/.story_kit/stories/archive/13_stop_button.md b/.story_kit/stories/archived/13_stop_button.md similarity index 100% rename from .story_kit/stories/archive/13_stop_button.md rename to .story_kit/stories/archived/13_stop_button.md diff --git a/.story_kit/stories/archive/14_put_cursor_in_chat_box_on_startup.md b/.story_kit/stories/archived/14_put_cursor_in_chat_box_on_startup.md similarity index 100% rename from .story_kit/stories/archive/14_put_cursor_in_chat_box_on_startup.md rename to .story_kit/stories/archived/14_put_cursor_in_chat_box_on_startup.md diff --git a/.story_kit/stories/archive/15_new_session_cancellation.md b/.story_kit/stories/archived/15_new_session_cancellation.md similarity index 100% rename from .story_kit/stories/archive/15_new_session_cancellation.md rename to .story_kit/stories/archived/15_new_session_cancellation.md diff --git a/.story_kit/stories/archive/17_display_remaining_context.md b/.story_kit/stories/archived/17_display_remaining_context.md similarity index 100% rename from .story_kit/stories/archive/17_display_remaining_context.md rename to .story_kit/stories/archived/17_display_remaining_context.md diff --git a/.story_kit/stories/archive/18_streaming_responses.md b/.story_kit/stories/archived/18_streaming_responses.md similarity index 100% rename from .story_kit/stories/archive/18_streaming_responses.md rename to .story_kit/stories/archived/18_streaming_responses.md diff --git a/.story_kit/stories/archive/20_start_new_session.md b/.story_kit/stories/archived/20_start_new_session.md similarity index 100% rename from .story_kit/stories/archive/20_start_new_session.md rename to .story_kit/stories/archived/20_start_new_session.md diff --git a/.story_kit/stories/archive/22_smart_autoscroll.md b/.story_kit/stories/archived/22_smart_autoscroll.md similarity index 100% rename from .story_kit/stories/archive/22_smart_autoscroll.md rename to .story_kit/stories/archived/22_smart_autoscroll.md diff --git a/.story_kit/stories/archive/23_alphabetize_llm_dropdown.md b/.story_kit/stories/archived/23_alphabetize_llm_dropdown.md similarity index 100% rename from .story_kit/stories/archive/23_alphabetize_llm_dropdown.md rename to .story_kit/stories/archived/23_alphabetize_llm_dropdown.md diff --git a/.story_kit/stories/archive/24_tauri_to_browser_ui.md b/.story_kit/stories/archived/24_tauri_to_browser_ui.md similarity index 100% rename from .story_kit/stories/archive/24_tauri_to_browser_ui.md rename to .story_kit/stories/archived/24_tauri_to_browser_ui.md diff --git a/.story_kit/stories/archive/25_auto_scaffold_story_kit.md b/.story_kit/stories/archived/25_auto_scaffold_story_kit.md similarity index 100% rename from .story_kit/stories/archive/25_auto_scaffold_story_kit.md rename to .story_kit/stories/archived/25_auto_scaffold_story_kit.md diff --git a/.story_kit/stories/upcoming/26_define_tdd_workflow_rules.md b/.story_kit/stories/upcoming/26_define_tdd_workflow_rules.md new file mode 100644 index 0000000..db20965 --- /dev/null +++ b/.story_kit/stories/upcoming/26_define_tdd_workflow_rules.md @@ -0,0 +1,16 @@ +# Story 26: Define the TDD Workflow Rules + +## User Story +As a user, I want a clear TDD workflow (unit + integration), so the system enforces test-first development. + +## Acceptance Criteria +- The workflow explicitly requires tests before implementation. +- Both unit tests and integration tests are required for feature work. +- The concrete test frameworks to use are defined in `specs/tech/STACK.md`. +- Code changes are not accepted unless all required tests pass. +- Test removal requires explicit user approval. + +## Out of Scope +- Migrating existing tests to new frameworks. +- Backfilling missing tests for legacy code unless part of a new story. +- Defining project-specific test frameworks beyond what is documented in `specs/tech/STACK.md`. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/27_enforce_test_first.md b/.story_kit/stories/upcoming/27_enforce_test_first.md new file mode 100644 index 0000000..77e8fd7 --- /dev/null +++ b/.story_kit/stories/upcoming/27_enforce_test_first.md @@ -0,0 +1,14 @@ +# Story 27: Enforce Test-First Before Implementation + +## User Story +As a user, I want the workflow to block implementation until tests are written, so test-first development is enforced. + +## Acceptance Criteria +- The system prevents implementation work until required tests are created. +- The system prompts for missing tests before allowing code changes. +- The enforcement applies to both unit and integration tests. +- The user can explicitly acknowledge when tests are intentionally deferred. + +## Out of Scope +- Automated test generation. +- Converting legacy code to test-first. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/28_require_unit_and_integration_tests.md b/.story_kit/stories/upcoming/28_require_unit_and_integration_tests.md new file mode 100644 index 0000000..1576de1 --- /dev/null +++ b/.story_kit/stories/upcoming/28_require_unit_and_integration_tests.md @@ -0,0 +1,12 @@ +# Story 28: Require Unit and Integration Tests per Story + +## User Story +As a user, I want every story to include both unit and integration tests, so behavior is verified at multiple levels. + +## Acceptance Criteria +- Each story includes at least one unit test and one integration test, with integration tests using the standard Rust `tests/` layout. +- The required test types are defined by the frameworks listed in `specs/tech/STACK.md`. +- A story cannot be accepted if either test type is missing. + +## Out of Scope +- Retrofitting legacy features without an active story. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/29_block_acceptance_on_test_failures.md b/.story_kit/stories/upcoming/29_block_acceptance_on_test_failures.md new file mode 100644 index 0000000..5dbd2d1 --- /dev/null +++ b/.story_kit/stories/upcoming/29_block_acceptance_on_test_failures.md @@ -0,0 +1,14 @@ +# Story 29: Block Acceptance When Tests Fail + +## User Story +As a user, I want the workflow to block story acceptance when tests fail, so regressions cannot be accepted. + +## Acceptance Criteria +- The system runs all required tests before acceptance. +- If any test fails, the story cannot be accepted. +- The failure output is shown clearly to the user. +- A successful test run is required immediately before acceptance. + +## Out of Scope +- Adding new test frameworks beyond those in `specs/tech/STACK.md`. +- Retrying tests automatically without user confirmation. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/30_protect_against_test_deletion.md b/.story_kit/stories/upcoming/30_protect_against_test_deletion.md new file mode 100644 index 0000000..2c9c62a --- /dev/null +++ b/.story_kit/stories/upcoming/30_protect_against_test_deletion.md @@ -0,0 +1,13 @@ +# Story 30: Protect Against Test Deletion + +## User Story +As a user, I want explicit approval required before tests are deleted or weakened, so guardrails cannot be removed silently. + +## Acceptance Criteria +- Any deletion of test files requires explicit user approval. +- Any change that disables or neuters a test (e.g., commenting out assertions) requires explicit user approval. +- The system reports all test deletions or weakenings clearly before acceptance. + +## Out of Scope +- Refactoring tests without behavioral changes. +- Removing obsolete tests as part of a broader system rewrite. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/31_standardize_acceptance_test_layout.md b/.story_kit/stories/upcoming/31_standardize_acceptance_test_layout.md new file mode 100644 index 0000000..0f9d187 --- /dev/null +++ b/.story_kit/stories/upcoming/31_standardize_acceptance_test_layout.md @@ -0,0 +1,14 @@ +# Story 31: Standardize Acceptance Test Layout + +## User Story +As a user, I want a consistent acceptance test layout, so tests are easy to locate, review, and run across stories. + +## Acceptance Criteria +- A single, documented folder structure for acceptance tests is established. +- The naming convention for acceptance tests is consistent across stories. +- The chosen test frameworks are defined in `specs/tech/STACK.md`. +- The layout is used for all new acceptance tests. + +## Out of Scope +- Migrating existing tests to the new layout. +- Introducing new test frameworks beyond those listed in `specs/tech/STACK.md`. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/32_add_coverage_regression_guardrail.md b/.story_kit/stories/upcoming/32_add_coverage_regression_guardrail.md new file mode 100644 index 0000000..a9029fa --- /dev/null +++ b/.story_kit/stories/upcoming/32_add_coverage_regression_guardrail.md @@ -0,0 +1,13 @@ +# Story 32: Add Coverage Regression Guardrail + +## User Story +As a user, I want coverage regression to block acceptance, so test quality cannot quietly degrade. + +## Acceptance Criteria +- The workflow fails if coverage drops below the defined threshold. +- Coverage regression is reported clearly before acceptance. +- The coverage threshold is defined in `specs/tech/STACK.md`. + +## Out of Scope +- Selecting or adding new coverage tools beyond those listed in `specs/tech/STACK.md`. +- Retrofitting coverage reports for legacy test suites. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/33_test_result_summary_ui.md b/.story_kit/stories/upcoming/33_test_result_summary_ui.md new file mode 100644 index 0000000..4a3dda3 --- /dev/null +++ b/.story_kit/stories/upcoming/33_test_result_summary_ui.md @@ -0,0 +1,14 @@ +# Story 33: Add Test Result Summaries in the UI + +## User Story +As a user, I want test results summarized in the UI, so I can quickly review pass/fail status before acceptance. + +## Acceptance Criteria +- The UI displays a summary of unit test results. +- The UI displays a summary of integration test results. +- Failed tests show clear error output in the UI. +- A passing test summary is required immediately before acceptance. + +## Out of Scope +- Adding new test frameworks beyond those listed in `specs/tech/STACK.md`. +- Persisting historical test results across sessions. \ No newline at end of file diff --git a/.story_kit/stories/upcoming/34_backfill_tests_high_coverage.md b/.story_kit/stories/upcoming/34_backfill_tests_high_coverage.md new file mode 100644 index 0000000..66eadfd --- /dev/null +++ b/.story_kit/stories/upcoming/34_backfill_tests_high_coverage.md @@ -0,0 +1,14 @@ +# Story 34: Backfill Tests for Maximum Coverage + +## User Story +As a user, I want us to backfill tests across existing code so overall coverage is as high as we can reasonably achieve. + +## Acceptance Criteria +- We add unit and integration tests to existing code paths with a goal of maximizing coverage. +- New tests are prioritized for critical workflows (filesystem access, project open/close, tool execution, chat flow). +- Coverage improvements are measured and reported for each test backfill batch. +- The work continues until coverage is as high as practically achievable with the current architecture. + +## Out of Scope +- Major refactors solely to increase coverage (unless required to make code testable). +- Introducing new testing frameworks beyond those listed in `specs/tech/STACK.md`. \ No newline at end of file