Compare commits
140 Commits
v0.4.0
...
ca949aec46
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca949aec46 | ||
|
|
9ceda6694d | ||
|
|
b0a1eafd8d | ||
|
|
b2c9ece9e4 | ||
|
|
24ca585fd0 | ||
|
|
c4e45b2841 | ||
|
|
51d878e117 | ||
|
|
cb991da84c | ||
|
|
28b78c4a32 | ||
|
|
6f47943369 | ||
|
|
8885543c25 | ||
|
|
2208aba3fb | ||
|
|
d3f462e518 | ||
|
|
9581e5d51a | ||
|
|
375277f86e | ||
|
|
c096488ba1 | ||
|
|
ab5fb734e5 | ||
|
|
d7e814c02c | ||
|
|
4268db8641 | ||
|
|
943bd38d19 | ||
|
|
16df7b783d | ||
|
|
24eb20f985 | ||
|
|
9f18cacbed | ||
|
|
737ddca884 | ||
|
|
0e8963591e | ||
|
|
492a0fc749 | ||
|
|
287c64faf1 | ||
|
|
31085e8c9f | ||
|
|
1a22e0cb41 | ||
|
|
f653b51dd7 | ||
|
|
2f66c7d30e | ||
|
|
3031c158e7 | ||
|
|
6aa932b349 | ||
|
|
fb23e2218b | ||
|
|
01d1c15d91 | ||
|
|
9622a1a572 | ||
|
|
e99a3da336 | ||
|
|
60e1d7bf64 | ||
|
|
3cfe25f97a | ||
|
|
ebb9df9780 | ||
|
|
d0ec1eebd7 | ||
|
|
19bb3a6b52 | ||
|
|
594114d671 | ||
|
|
0897b36cc1 | ||
|
|
81e822642e | ||
|
|
134cae216a | ||
|
|
2b5c7578d3 | ||
|
|
3778162920 | ||
|
|
8c3e92f936 | ||
|
|
25711fc16b | ||
|
|
e33979aacb | ||
|
|
31e2f823f7 | ||
|
|
ec5f4afcfb | ||
|
|
75640c6ecf | ||
|
|
69030599d3 | ||
|
|
adab08f804 | ||
|
|
3d59077a3c | ||
|
|
84acc82f8c | ||
|
|
a2ea1d65aa | ||
|
|
086eb908ee | ||
|
|
c138246db3 | ||
|
|
c7846c041c | ||
|
|
9b6bde95bc | ||
|
|
f625534ff0 | ||
|
|
3f59420b2b | ||
|
|
9730a923dd | ||
|
|
c7b2b5820b | ||
|
|
303fdbad6f | ||
|
|
c0a7f5fbfb | ||
|
|
5215956314 | ||
|
|
27b86da0aa | ||
|
|
2ce0166ea8 | ||
|
|
2d377532df | ||
|
|
3885802d79 | ||
|
|
f650fef1e5 | ||
|
|
dbc8849681 | ||
|
|
b17ba0c8dd | ||
|
|
eea797975b | ||
|
|
67e6a4afe6 | ||
|
|
9c01bfebc8 | ||
|
|
8ea69fc70f | ||
|
|
1bd816f5a6 | ||
|
|
5f3dcebfc3 | ||
|
|
ef9ec8bbbe | ||
|
|
14a97ed4ed | ||
|
|
3932aa65c2 | ||
|
|
9a77ffaa83 | ||
|
|
a9d5e9f6f8 | ||
|
|
f5b1103bf6 | ||
|
|
39707ce026 | ||
|
|
ff1705f26c | ||
|
|
14cab448cb | ||
|
|
973af81fa5 | ||
|
|
09890b5ea4 | ||
|
|
4fe61c643b | ||
|
|
665ffa9521 | ||
|
|
e558d716d8 | ||
|
|
cc403e7736 | ||
|
|
48edf6a94b | ||
|
|
27c406330a | ||
|
|
48aad2323d | ||
|
|
6c00f66894 | ||
|
|
90c98f5b47 | ||
|
|
a8cb851ba7 | ||
|
|
02947700ba | ||
|
|
e3ade4cca5 | ||
|
|
e9a0858d53 | ||
|
|
c84c33a1a7 | ||
|
|
09a47ac224 | ||
|
|
a791b9f49c | ||
|
|
351f770516 | ||
|
|
cc0110e577 | ||
|
|
8eff849e83 | ||
|
|
7439c227a9 | ||
|
|
887ab81898 | ||
|
|
ef6201b273 | ||
|
|
5af3c17522 | ||
|
|
7cb95260aa | ||
|
|
cf4a40a431 | ||
|
|
6c03c4f9dc | ||
|
|
4acf38f035 | ||
|
|
d6d7fb3c59 | ||
|
|
9e4587ceb4 | ||
|
|
72216fe7ce | ||
|
|
9807eb6bb0 | ||
|
|
7eb9686bfb | ||
|
|
3b9cea22bb | ||
|
|
e2a8b978f7 | ||
|
|
748d86cf10 | ||
|
|
060d5a40a4 | ||
|
|
3cb4f32634 | ||
|
|
e00d780940 | ||
|
|
df7556c0f3 | ||
|
|
07f8defad0 | ||
|
|
1739c2ff58 | ||
|
|
e6eaa10c16 | ||
|
|
110db662a9 | ||
|
|
9998b3ed25 | ||
|
|
d27a389a21 | ||
|
|
11a6be4b45 |
@@ -1,12 +1,10 @@
|
||||
{
|
||||
"enabledMcpjsonServers": [
|
||||
"story-kit"
|
||||
],
|
||||
"enabledMcpjsonServers": ["storkit"],
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(./server/target/debug/story-kit:*)",
|
||||
"Bash(./target/debug/story-kit:*)",
|
||||
"Bash(STORYKIT_PORT=*)",
|
||||
"Bash(./server/target/debug/storkit:*)",
|
||||
"Bash(./target/debug/storkit:*)",
|
||||
"Bash(STORKIT_PORT=*)",
|
||||
"Bash(cargo build:*)",
|
||||
"Bash(cargo check:*)",
|
||||
"Bash(cargo clippy:*)",
|
||||
@@ -56,7 +54,7 @@
|
||||
"WebFetch(domain:portkey.ai)",
|
||||
"WebFetch(domain:www.shuttle.dev)",
|
||||
"WebSearch",
|
||||
"mcp__story-kit__*",
|
||||
"mcp__storkit__*",
|
||||
"Edit",
|
||||
"Write",
|
||||
"Bash(find *)",
|
||||
@@ -72,4 +70,4 @@
|
||||
"Bash(npm run dev:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -5,9 +5,9 @@
|
||||
# Local environment (secrets)
|
||||
.env
|
||||
|
||||
# App specific (root-level; story-kit subdirectory patterns live in .story_kit/.gitignore)
|
||||
# App specific (root-level; storkit subdirectory patterns live in .storkit/.gitignore)
|
||||
store.json
|
||||
.story_kit_port
|
||||
.storkit_port
|
||||
|
||||
# Rust stuff
|
||||
target
|
||||
|
||||
@@ -11,7 +11,7 @@ When you start a new session with this project:
|
||||
|
||||
1. **Check for MCP Tools:** Read `.mcp.json` to discover the MCP server endpoint. Then list available tools by calling:
|
||||
```bash
|
||||
curl -s "$(jq -r '.mcpServers["story-kit"].url' .mcp.json)" \
|
||||
curl -s "$(jq -r '.mcpServers["storkit"].url' .mcp.json)" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list","params":{}}'
|
||||
```
|
||||
61
.storkit/bot.toml.example
Normal file
61
.storkit/bot.toml.example
Normal file
@@ -0,0 +1,61 @@
|
||||
homeserver = "https://matrix.example.com"
|
||||
username = "@botname:example.com"
|
||||
password = "your-bot-password"
|
||||
|
||||
# List one or more rooms to listen in. Use a single-element list for one room.
|
||||
room_ids = ["!roomid:example.com"]
|
||||
|
||||
# Optional: the deprecated single-room key is still accepted for backwards compat.
|
||||
# room_id = "!roomid:example.com"
|
||||
|
||||
allowed_users = ["@youruser:example.com"]
|
||||
enabled = false
|
||||
|
||||
# Maximum conversation turns to remember per room (default: 20).
|
||||
# history_size = 20
|
||||
|
||||
# Rooms where the bot responds to all messages (not just addressed ones).
|
||||
# This list is updated automatically when users toggle ambient mode at runtime.
|
||||
# ambient_rooms = ["!roomid:example.com"]
|
||||
|
||||
# ── WhatsApp Business API ──────────────────────────────────────────────
|
||||
# Set transport = "whatsapp" to use WhatsApp instead of Matrix.
|
||||
# The webhook endpoint will be available at /webhook/whatsapp.
|
||||
# You must configure this URL in the Meta Developer Dashboard.
|
||||
#
|
||||
# transport = "whatsapp"
|
||||
# whatsapp_phone_number_id = "123456789012345"
|
||||
# whatsapp_access_token = "EAAx..."
|
||||
# whatsapp_verify_token = "my-secret-verify-token"
|
||||
#
|
||||
# ── 24-hour messaging window & notification templates ─────────────────
|
||||
# WhatsApp only allows free-form text messages within 24 hours of the last
|
||||
# inbound message from a user. For proactive pipeline notifications sent
|
||||
# after the window expires, an approved Meta message template is used.
|
||||
#
|
||||
# Register the template in the Meta Business Manager:
|
||||
# 1. Go to Business Settings → WhatsApp → Message Templates → Create.
|
||||
# 2. Category: UTILITY
|
||||
# 3. Template name: pipeline_notification (or your chosen name below)
|
||||
# 4. Language: English (en_US)
|
||||
# 5. Body text (example):
|
||||
# Story *{{1}}* has moved to *{{2}}*.
|
||||
# Where {{1}} = story name, {{2}} = pipeline stage.
|
||||
# 6. Submit for review. Meta typically approves utility templates within
|
||||
# minutes; transactional categories may take longer.
|
||||
#
|
||||
# Once approved, set the name below (default: "pipeline_notification"):
|
||||
# whatsapp_notification_template = "pipeline_notification"
|
||||
|
||||
# ── Slack Bot API ─────────────────────────────────────────────────────
|
||||
# Set transport = "slack" to use Slack instead of Matrix.
|
||||
# The webhook endpoint will be available at /webhook/slack.
|
||||
# Configure this URL in the Slack App → Event Subscriptions → Request URL.
|
||||
#
|
||||
# Required Slack App scopes: chat:write, chat:update
|
||||
# Subscribe to bot events: message.channels, message.groups, message.im
|
||||
#
|
||||
# transport = "slack"
|
||||
# slack_bot_token = "xoxb-..."
|
||||
# slack_signing_secret = "your-signing-secret"
|
||||
# slack_channel_ids = ["C01ABCDEF"]
|
||||
28
.storkit/problems.md
Normal file
28
.storkit/problems.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Problems
|
||||
|
||||
Recurring issues observed during pipeline operation. Review periodically and create stories for systemic problems.
|
||||
|
||||
## 2026-03-18: Stories graduating to "done" with empty merges (7 of 10)
|
||||
|
||||
Pipeline allows stories to move through coding → QA → merge → done without any actual code changes landing on master. The squash-merge produces an empty diff but the pipeline still marks the story as done. Affected stories: 247, 273, 274, 278, 279, 280, 92. Only 266, 271, 277, and 281 actually shipped code. Root cause: no check that the merge commit contains a non-empty diff. Filed bug 283 for the manual_qa gate issue specifically, but the empty-merge-to-done problem is broader and needs its own fix.
|
||||
|
||||
## 2026-03-18: Agent committed directly to master instead of worktree
|
||||
|
||||
Multiple agents have committed directly to master instead of their worktree/feature branch:
|
||||
|
||||
- Commit `5f4591f` ("fix: update should_commit_stage test to match 5_done") — likely mergemaster
|
||||
- Commit `a32cfbd` ("Add bot-level command registry with help command") — story 285 coder committed code + Cargo.lock directly to master
|
||||
|
||||
Agents should only commit to their feature branch or merge-queue branch, never to master directly. Suspect agents are running `git commit` in the project root instead of the worktree directory. This can also revert uncommitted fixes on master (e.g. project.toml pkill fix was overwritten). Frequency: at least 2 confirmed cases. This is a recurring and serious problem — needs a guard in the server or agent prompts.
|
||||
|
||||
## 2026-03-19: Auto-assign re-assigns mergemaster to failed merge stories in a loop
|
||||
|
||||
After bug 295 fix (`auto_assign_available_work` after every pipeline advance), mergemaster gets re-assigned to stories that already have a merge failure flag. Story 310 had an empty diff merge failure — mergemaster correctly reported the failure, but auto-assign immediately re-assigned mergemaster to the same story, creating an infinite retry loop. The auto-assign logic needs to check for the `merge_failure` front matter flag before re-assigning agents to stories in `4_merge/`.
|
||||
|
||||
## 2026-03-19: Coder produces no code (complete ghost — story 310)
|
||||
|
||||
Story 310 (Bot delete command) went through the full pipeline — coder session ran, passed QA/gates, moved to merge — but the coder produced zero code. No commits on the feature branch, no commits on master. The entire agent session was a no-op. This is different from the "committed to master instead of worktree" problem — in this case, the coder simply did nothing. Need to investigate the coder logs to understand what happened. The empty-diff merge check would catch this at merge time, but ideally the server should detect "coder finished with no commits on feature branch" at the gate-check stage and fail early.
|
||||
|
||||
## 2026-03-19: Auto-assign assigns mergemaster to coding-stage stories
|
||||
|
||||
Auto-assign picked mergemaster for story 310 which was in `2_current/`. Mergemaster should only work on stories in `4_merge/`. The `auto_assign_available_work` function doesn't enforce that the agent's configured stage matches the pipeline stage of the story it's being assigned to. Story 279 (auto-assign respects agent stage from front matter) was supposed to fix this, but the check may only apply to front-matter preferences, not the fallback assignment path.
|
||||
@@ -25,45 +25,6 @@ path = "."
|
||||
setup = ["mkdir -p frontend/dist", "cargo check"]
|
||||
teardown = []
|
||||
|
||||
[[agent]]
|
||||
name = "supervisor"
|
||||
stage = "other"
|
||||
role = "Coordinates work, reviews PRs, decomposes stories."
|
||||
model = "opus"
|
||||
max_turns = 200
|
||||
max_budget_usd = 15.00
|
||||
prompt = """You are the supervisor for story {{story_id}}. Your job is to coordinate coder agents to implement this story.
|
||||
|
||||
Read CLAUDE.md first, then .story_kit/README.md to understand the dev process (SDTW). You are responsible for ensuring coders follow this process.
|
||||
|
||||
## Your MCP Tools
|
||||
You have these tools via the story-kit MCP server:
|
||||
- start_agent(story_id, agent_name) - Start a coder agent on a story
|
||||
- wait_for_agent(story_id, agent_name, timeout_ms) - Block until the agent reaches a terminal state (completed/failed). Returns final status including completion report with gates_passed.
|
||||
- get_agent_output(story_id, agent_name, timeout_ms) - Poll agent output (returns recent events, call repeatedly)
|
||||
- list_agents() - See all running agents and their status
|
||||
- stop_agent(story_id, agent_name) - Stop a running agent
|
||||
- get_story_todos(story_id) - Get unchecked acceptance criteria for a story in work/2_current/
|
||||
- ensure_acceptance(story_id) - Check if a story passes acceptance gates
|
||||
|
||||
## Your Workflow
|
||||
1. Read CLAUDE.md and .story_kit/README.md to understand the project and dev process
|
||||
2. Read the story file from .story_kit/work/ to understand requirements
|
||||
3. Move it to work/2_current/ if it is in work/1_backlog/
|
||||
4. Start coder-1 on the story: call start_agent with story_id="{{story_id}}" and agent_name="coder-1"
|
||||
5. Wait for completion: call wait_for_agent with story_id="{{story_id}}" and agent_name="coder-1". The server automatically runs acceptance gates (cargo clippy + tests) when the coder process exits. wait_for_agent returns when the coder reaches a terminal state.
|
||||
6. Check the result: inspect the "completion" field in the wait_for_agent response — if gates_passed is true, the work is done; if false, review the gate_output and decide whether to start a fresh coder.
|
||||
7. If the agent gets stuck, stop it and start a fresh agent.
|
||||
8. STOP here. Do NOT accept the story or merge to master. Report the status to the human for final review and acceptance.
|
||||
|
||||
## Rules
|
||||
- Do NOT implement code yourself - delegate to coder agents
|
||||
- Only run one coder at a time per story
|
||||
- Focus on coordination, monitoring, and quality review
|
||||
- Never accept stories or merge to master - that is the human's job
|
||||
- Your job ends when the coder's completion report shows gates_passed=true and you have reported the result"""
|
||||
system_prompt = "You are a supervisor agent. Read CLAUDE.md and .story_kit/README.md first to understand the project dev process. Use MCP tools to coordinate sub-agents. Never implement code directly - always delegate to coder agents and monitor their progress. Use wait_for_agent to block until the coder finishes — the server automatically runs acceptance gates when the agent process exits. Never accept stories or merge to master - get all gates green and report to the human."
|
||||
|
||||
[[agent]]
|
||||
name = "coder-1"
|
||||
stage = "coder"
|
||||
@@ -127,7 +88,7 @@ Read CLAUDE.md first, then .story_kit/README.md to understand the dev process.
|
||||
- URL to visit in the browser
|
||||
- Things to check in the UI
|
||||
- curl commands to exercise relevant API endpoints
|
||||
- Kill the test server when done: `pkill -f 'target.*story-kit' || true` (NEVER use `pkill -f story-kit` — it kills the vite dev server)
|
||||
- Kill the test server when done: `pkill -f 'target.*storkit' || true` (NEVER use `pkill -f storkit` — it kills the vite dev server)
|
||||
|
||||
### 4. Produce Structured Report
|
||||
Print your QA report to stdout before your process exits. The server will automatically run acceptance gates. Use this format:
|
||||
@@ -204,7 +165,7 @@ Read CLAUDE.md first, then .story_kit/README.md to understand the dev process.
|
||||
- URL to visit in the browser
|
||||
- Things to check in the UI
|
||||
- curl commands to exercise relevant API endpoints
|
||||
- Kill the test server when done: `pkill -f 'target.*story-kit' || true` (NEVER use `pkill -f story-kit` — it kills the vite dev server)
|
||||
- Kill the test server when done: `pkill -f 'target.*storkit' || true` (NEVER use `pkill -f storkit` — it kills the vite dev server)
|
||||
|
||||
### 4. Produce Structured Report
|
||||
Print your QA report to stdout before your process exits. The server will automatically run acceptance gates. Use this format:
|
||||
44
.storkit/specs/functional/SLACK_SETUP.md
Normal file
44
.storkit/specs/functional/SLACK_SETUP.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# Slack Integration Setup
|
||||
|
||||
## Bot Configuration
|
||||
|
||||
Slack integration is configured via `bot.toml` in the project's `.story_kit/` directory:
|
||||
|
||||
```toml
|
||||
transport = "slack"
|
||||
display_name = "Storkit"
|
||||
slack_bot_token = "xoxb-..."
|
||||
slack_signing_secret = "..."
|
||||
slack_channel_ids = ["C01ABCDEF"]
|
||||
```
|
||||
|
||||
## Slack App Configuration
|
||||
|
||||
### Event Subscriptions
|
||||
|
||||
1. In your Slack app settings, enable **Event Subscriptions**.
|
||||
2. Set the **Request URL** to: `https://<your-host>/webhook/slack`
|
||||
3. Subscribe to the `message.channels` and `message.im` bot events.
|
||||
|
||||
### Slash Commands
|
||||
|
||||
Slash commands provide quick access to pipeline commands without mentioning the bot.
|
||||
|
||||
1. In your Slack app settings, go to **Slash Commands**.
|
||||
2. Create the following commands, all pointing to the same **Request URL**: `https://<your-host>/webhook/slack/command`
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/storkit-status` | Show pipeline status and agent availability |
|
||||
| `/storkit-cost` | Show token spend: 24h total, top stories, and breakdown |
|
||||
| `/storkit-show` | Display the full text of a work item (e.g. `/storkit-show 42`) |
|
||||
| `/storkit-git` | Show git status: branch, changes, ahead/behind |
|
||||
| `/storkit-htop` | Show system and agent process dashboard |
|
||||
|
||||
All slash command responses are **ephemeral** — only the user who invoked the command sees the response.
|
||||
|
||||
### OAuth & Permissions
|
||||
|
||||
Required bot token scopes:
|
||||
- `chat:write` — send messages
|
||||
- `commands` — handle slash commands
|
||||
@@ -118,8 +118,8 @@ To support both Remote and Local models, the system implements a `ModelProvider`
|
||||
|
||||
Multiple instances can run simultaneously in different worktrees. To avoid port conflicts:
|
||||
|
||||
- **Backend:** Set `STORYKIT_PORT` to a unique port (default is 3001). Example: `STORYKIT_PORT=3002 cargo run`
|
||||
- **Frontend:** Run `npm run dev` from `frontend/`. It auto-selects the next unused port. It reads `STORYKIT_PORT` to know which backend to talk to, so export it before running: `export STORYKIT_PORT=3002 && cd frontend && npm run dev`
|
||||
- **Backend:** Set `STORKIT_PORT` to a unique port (default is 3001). Example: `STORKIT_PORT=3002 cargo run`
|
||||
- **Frontend:** Run `npm run dev` from `frontend/`. It auto-selects the next unused port. It reads `STORKIT_PORT` to know which backend to talk to, so export it before running: `export STORKIT_PORT=3002 && cd frontend && npm run dev`
|
||||
|
||||
When running in a worktree, use a port that won't conflict with the main instance (3001). Ports 3002+ are good choices.
|
||||
|
||||
@@ -127,4 +127,4 @@ When running in a worktree, use a port that won't conflict with the main instanc
|
||||
1. **Project Scope:** The application must strictly enforce that it does not read/write outside the `project_root` selected by the user.
|
||||
2. **Human in the Loop:**
|
||||
* Shell commands that modify state (non-readonly) should ideally require a UI confirmation (configurable).
|
||||
* File writes must be confirmed or revertible.
|
||||
* File writes must be confirmed or revertible.
|
||||
@@ -0,0 +1,69 @@
|
||||
---
|
||||
name: "Evaluate Docker/OrbStack for agent isolation and resource limiting"
|
||||
agent: coder-opus
|
||||
---
|
||||
|
||||
# Spike 329: Evaluate Docker/OrbStack for agent isolation and resource limiting
|
||||
|
||||
## Question
|
||||
|
||||
Investigate running the entire storkit system (server, Matrix bot, agents, web UI) inside a single Docker container, using OrbStack as the macOS runtime for better performance. The goal is to isolate storkit from the host machine — not to isolate agents from each other.
|
||||
|
||||
Currently storkit runs as bare processes on the host with full filesystem and network access. A single container would provide:
|
||||
|
||||
1. **Host isolation** — storkit can't touch anything outside the container
|
||||
2. **Clean install/uninstall** — `docker run` to start, `docker rm` to remove
|
||||
3. **Reproducible environment** — same container works on any machine
|
||||
4. **Distributable product** — `docker pull storkit` for new users
|
||||
5. **Resource limits** — cap total CPU/memory for the whole system
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Docker Container (single)
|
||||
├── storkit server
|
||||
│ ├── Matrix bot
|
||||
│ ├── WhatsApp webhook
|
||||
│ ├── Slack webhook
|
||||
│ ├── Web UI
|
||||
│ └── MCP server
|
||||
├── Agent processes (coder-1, coder-2, coder-opus, qa, mergemaster)
|
||||
├── Rust toolchain + Node.js + Claude Code CLI
|
||||
└── /workspace (bind-mounted project repo from host)
|
||||
```
|
||||
|
||||
## Key questions to answer:
|
||||
|
||||
- **Performance**: How much slower are cargo builds inside the container on macOS? Compare Docker Desktop vs OrbStack for bind-mounted volumes.
|
||||
- **Dockerfile**: What's the minimal image for the full stack? Rust toolchain + Node.js + Claude Code CLI + cargo-nextest + git.
|
||||
- **Bind mounts**: The project repo is bind-mounted from the host. Any filesystem performance concerns with OrbStack?
|
||||
- **Networking**: Container exposes web UI port (3000). Matrix/WhatsApp/Slack connect outbound. Any issues?
|
||||
- **API key**: Pass ANTHROPIC_API_KEY as env var to the container.
|
||||
- **Git**: Git operations happen inside the container on the bind-mounted repo. Commits are visible on the host immediately.
|
||||
- **Cargo cache**: Use a named Docker volume for ~/.cargo/registry so dependencies persist across container restarts.
|
||||
- **Claude Code state**: Where does Claude Code store its session data? Needs to persist or be in a volume.
|
||||
- **OrbStack vs Docker Desktop**: Is OrbStack required for acceptable performance, or does Docker Desktop work too?
|
||||
- **Server restart**: Does `rebuild_and_restart` work inside a container (re-exec with new binary)?
|
||||
|
||||
## Deliverable:
|
||||
A proof-of-concept Dockerfile, docker-compose.yml, and a short write-up with findings and performance benchmarks.
|
||||
|
||||
## Hypothesis
|
||||
|
||||
- TBD
|
||||
|
||||
## Timebox
|
||||
|
||||
- TBD
|
||||
|
||||
## Investigation Plan
|
||||
|
||||
- TBD
|
||||
|
||||
## Findings
|
||||
|
||||
- TBD
|
||||
|
||||
## Recommendation
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,40 @@
|
||||
---
|
||||
name: "Abstract agent runtime to support non-Claude-Code backends"
|
||||
---
|
||||
|
||||
# Refactor 343: Abstract agent runtime to support non-Claude-Code backends
|
||||
|
||||
## Current State
|
||||
|
||||
- TBD
|
||||
|
||||
## Desired State
|
||||
|
||||
Currently agent spawning is tightly coupled to Claude Code CLI — agents are spawned as PTY processes running the `claude` binary. To support ChatGPT and Gemini as agent backends, we need to abstract the agent runtime.
|
||||
|
||||
The agent pool currently does:
|
||||
1. Spawn `claude` CLI process via portable-pty
|
||||
2. Stream JSON events from stdout
|
||||
3. Parse tool calls, text output, thinking traces
|
||||
4. Wait for process exit, run gates
|
||||
|
||||
This needs to become a trait so different backends can be plugged in:
|
||||
- Claude Code (existing) — spawns `claude` CLI, parses JSON stream
|
||||
- OpenAI API — calls ChatGPT via API with tool definitions, manages conversation loop
|
||||
- Gemini API — calls Gemini via API with tool definitions, manages conversation loop
|
||||
|
||||
The key abstraction is: an agent runtime takes a prompt + tools and produces a stream of events (text output, tool calls, completion). The existing PTY/Claude Code logic becomes one implementation of this trait.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Define an AgentRuntime trait with methods for: start, stream_events, stop, get_status
|
||||
- [ ] ClaudeCodeRuntime implements the trait using existing PTY spawning logic
|
||||
- [ ] Agent pool uses the trait instead of directly spawning Claude Code
|
||||
- [ ] Runtime selection is configurable per agent in project.toml (e.g. runtime = 'claude-code')
|
||||
- [ ] All existing Claude Code agent functionality preserved
|
||||
- [ ] Event stream format is runtime-agnostic (text, tool_call, thinking, done)
|
||||
- [ ] Token usage tracking works across runtimes
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
name: "ChatGPT agent backend via OpenAI API"
|
||||
---
|
||||
|
||||
# Story 344: ChatGPT agent backend via OpenAI API
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner, I want to run agents using ChatGPT (GPT-4o, o3, etc.) via the OpenAI API, so that I can use OpenAI models for coding tasks alongside Claude.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Implement OpenAiRuntime using the AgentRuntime trait from refactor 343
|
||||
- [ ] Supports GPT-4o and o3 models via the OpenAI chat completions API
|
||||
- [ ] Manages a conversation loop: send prompt + tool definitions, execute tool calls, continue until done
|
||||
- [ ] Agents connect to storkit's MCP server for all tool operations — no custom file/bash tools needed
|
||||
- [ ] MCP tool definitions are converted to OpenAI function calling format
|
||||
- [ ] Configurable in project.toml: runtime = 'openai', model = 'gpt-4o'
|
||||
- [ ] OPENAI_API_KEY passed via environment variable
|
||||
- [ ] Token usage tracked and logged to token_usage.jsonl
|
||||
- [ ] Agent output streams to the same event system (web UI, bot notifications)
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,25 @@
|
||||
---
|
||||
name: "Gemini agent backend via Google AI API"
|
||||
---
|
||||
|
||||
# Story 345: Gemini agent backend via Google AI API
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner, I want to run agents using Gemini (2.5 Pro, etc.) via the Google AI API, so that I can use Google models for coding tasks alongside Claude and ChatGPT.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Implement GeminiRuntime using the AgentRuntime trait from refactor 343
|
||||
- [ ] Supports Gemini 2.5 Pro and other Gemini models via the Google AI generativeai API
|
||||
- [ ] Manages a conversation loop: send prompt + tool definitions, execute tool calls, continue until done
|
||||
- [ ] Agents connect to storkit's MCP server for all tool operations — no custom file/bash tools needed
|
||||
- [ ] MCP tool definitions are converted to Gemini function calling format
|
||||
- [ ] Configurable in project.toml: runtime = 'gemini', model = 'gemini-2.5-pro'
|
||||
- [ ] GOOGLE_AI_API_KEY passed via environment variable
|
||||
- [ ] Token usage tracked and logged to token_usage.jsonl
|
||||
- [ ] Agent output streams to the same event system (web UI, bot notifications)
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,22 @@
|
||||
---
|
||||
name: "MCP tools for code search (grep and glob)"
|
||||
---
|
||||
|
||||
# Story 348: MCP tools for code search (grep and glob)
|
||||
|
||||
## User Story
|
||||
|
||||
As a non-Claude agent connected via MCP, I want search tools so that I can find files and search code contents in my worktree.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] grep tool — searches file contents with regex support, returns matching lines with context
|
||||
- [ ] glob tool — finds files by pattern (e.g. '**/*.rs')
|
||||
- [ ] Both scoped to the agent's worktree
|
||||
- [ ] grep supports output modes: content (matching lines), files_with_matches (just paths), count
|
||||
- [ ] grep supports context lines (-A, -B, -C)
|
||||
- [ ] Results limited to prevent overwhelming the LLM context
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,23 @@
|
||||
---
|
||||
name: "MCP tools for git operations"
|
||||
---
|
||||
|
||||
# Story 349: MCP tools for git operations
|
||||
|
||||
## User Story
|
||||
|
||||
As a non-Claude agent connected via MCP, I want git tools so that I can check status, stage files, commit changes, and view history in my worktree.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] git_status tool — returns working tree status (staged, unstaged, untracked files)
|
||||
- [ ] git_diff tool — returns diff output, supports staged/unstaged/commit range
|
||||
- [ ] git_add tool — stages files by path
|
||||
- [ ] git_commit tool — commits staged changes with a message
|
||||
- [ ] git_log tool — returns commit history with configurable count and format
|
||||
- [ ] All operations run in the agent's worktree
|
||||
- [ ] Cannot push, force-push, or modify remote — server handles that
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
name: "MCP tool for code definitions lookup"
|
||||
---
|
||||
|
||||
# Story 350: MCP tool for code definitions lookup
|
||||
|
||||
## User Story
|
||||
|
||||
As a non-Claude agent connected via MCP, I want a code intelligence tool so that I can find function, struct, and type definitions without grepping through all files.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] get_definitions tool — finds function/struct/enum/type/class definitions by name or pattern
|
||||
- [ ] Supports Rust (fn, struct, enum, impl, trait) and TypeScript (function, class, interface, type) at minimum
|
||||
- [ ] Returns file path, line number, and the definition signature
|
||||
- [ ] Scoped to the agent's worktree
|
||||
- [ ] Faster than grepping — uses tree-sitter or regex-based parsing
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,18 @@
|
||||
---
|
||||
name: "Make help command output alphabetical"
|
||||
---
|
||||
|
||||
# Story 354: Make help command output alphabetical
|
||||
|
||||
## User Story
|
||||
|
||||
As a ..., I want ..., so that ...
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Help command lists bot commands in alphabetical order
|
||||
- [ ] Existing help tests still pass
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: "Bot rebuild command to trigger server rebuild and restart"
|
||||
---
|
||||
|
||||
# Story 355: Bot rebuild command to trigger server rebuild and restart
|
||||
|
||||
## User Story
|
||||
|
||||
As a ..., I want ..., so that ...
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Matrix bot recognizes `rebuild` as a command
|
||||
- [ ] Command triggers rebuild_and_restart and reports result back to the room
|
||||
- [ ] Command appears in help output
|
||||
- [ ] Build failures are reported to the user without crashing the server
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
name: "Web UI button to start a coder on a story"
|
||||
---
|
||||
|
||||
# Story 336: Web UI button to start a coder on a story
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner using the web UI, I want to click a button on a work item to start a coder on it, so that I can kick off work without using the terminal or chat bot.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Start button visible on work items in backlog and current stages
|
||||
- [ ] Clicking start assigns the default coder and moves the story to current if needed
|
||||
- [ ] Option to select a specific agent (dropdown: coder-1, coder-2, coder-opus)
|
||||
- [ ] Button disabled when all coders are busy (shows tooltip explaining why)
|
||||
- [ ] UI updates immediately to show the assigned agent
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: "Web UI button to stop an agent on a story"
|
||||
---
|
||||
|
||||
# Story 337: Web UI button to stop an agent on a story
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner using the web UI, I want to click a button on a work item to stop its running agent, so that I can halt work without using the terminal or chat bot.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Stop button visible on work items that have a running agent
|
||||
- [ ] Clicking stop kills the agent and shows confirmation
|
||||
- [ ] Button only appears when an agent is actively running
|
||||
- [ ] UI updates immediately to reflect the agent is stopped
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
name: "Web UI button to move stories between pipeline stages"
|
||||
---
|
||||
|
||||
# Story 338: Web UI button to move stories between pipeline stages
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner using the web UI, I want to drag or click to move stories between pipeline stages, so that I can manage the pipeline visually.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Move buttons or dropdown on each work item to change stage (backlog, current, done)
|
||||
- [ ] Uses the existing move_story MCP tool under the hood
|
||||
- [ ] Shows confirmation with old and new stage
|
||||
- [ ] UI updates immediately to reflect the move
|
||||
- [ ] Prevents invalid moves (e.g. moving to QA or merge without an agent)
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
name: "Web UI agent assignment dropdown on work items"
|
||||
---
|
||||
|
||||
# Story 339: Web UI agent assignment dropdown on work items
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner using the web UI, I want to select which agent to assign to a work item from a dropdown, so that I can control agent assignments visually.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Agent dropdown visible in expanded work item detail panel
|
||||
- [ ] Shows available agents filtered by appropriate stage (coders for current, QA for qa, mergemaster for merge)
|
||||
- [ ] Selecting an agent stops any current agent and starts the new one
|
||||
- [ ] Updates the story front matter with the agent assignment
|
||||
- [ ] Shows agent status (running, idle) in the dropdown
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
name: "Web UI rebuild and restart button"
|
||||
---
|
||||
|
||||
# Story 340: Web UI rebuild and restart button
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner using the web UI, I want a rebuild and restart button, so that I can deploy changes without terminal access.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Rebuild button in the web UI header or settings area
|
||||
- [ ] Shows confirmation dialog before triggering rebuild
|
||||
- [ ] Triggers the rebuild_and_restart MCP tool
|
||||
- [ ] Shows build progress or status indicator
|
||||
- [ ] Handles reconnection after server restarts
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,22 @@
|
||||
---
|
||||
name: "MCP tools for file operations (read, write, edit, list)"
|
||||
---
|
||||
|
||||
# Story 346: MCP tools for file operations (read, write, edit, list)
|
||||
|
||||
## User Story
|
||||
|
||||
As a non-Claude agent connected via MCP, I want file operation tools so that I can read, write, and edit code in my worktree.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] read_file tool — reads file contents, supports offset/limit for large files
|
||||
- [ ] write_file tool — writes/creates a file at a given path
|
||||
- [ ] edit_file tool — replaces a string in a file (old_string/new_string like Claude Code's Edit)
|
||||
- [ ] list_files tool — glob pattern matching to find files in the worktree
|
||||
- [ ] All operations scoped to the agent's worktree path for safety
|
||||
- [ ] Returns clear errors for missing files, permission issues, etc.
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,22 @@
|
||||
---
|
||||
name: "MCP tool for shell command execution"
|
||||
---
|
||||
|
||||
# Story 347: MCP tool for shell command execution
|
||||
|
||||
## User Story
|
||||
|
||||
As a non-Claude agent connected via MCP, I want a shell command tool so that I can run cargo build, npm test, and other commands in my worktree.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] run_command tool — executes a bash command and returns stdout/stderr/exit_code
|
||||
- [ ] Command runs in the agent's worktree directory
|
||||
- [ ] Supports timeout parameter (default 120s, max 600s)
|
||||
- [ ] Sandboxed to worktree — cannot cd outside or access host paths
|
||||
- [ ] Returns streaming output for long-running commands
|
||||
- [ ] Dangerous commands blocked (rm -rf /, etc.)
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,22 @@
|
||||
---
|
||||
name: "Bot reset command to clear conversation context"
|
||||
---
|
||||
|
||||
# Story 351: Bot reset command to clear conversation context
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner in a chat room, I want to type "{bot_name} reset" to drop the current Claude Code session and start fresh, so that I can reduce token usage when context gets bloated without restarting the server.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] '{bot_name} reset' kills the current Claude Code session
|
||||
- [ ] A new session starts immediately with clean context
|
||||
- [ ] Memories persist via the file system (auto-memory directory is unchanged)
|
||||
- [ ] Bot confirms the reset with a short message
|
||||
- [ ] Registered in the command registry so it appears in help output
|
||||
- [ ] Handled at bot level without LLM invocation
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -0,0 +1,30 @@
|
||||
---
|
||||
name: "Ambient on/off command not intercepted by bot after refactors"
|
||||
---
|
||||
|
||||
# Bug 352: Ambient on/off command not intercepted by bot after refactors
|
||||
|
||||
## Description
|
||||
|
||||
The ambient on/off bot command stopped being intercepted by the bot after the recent refactors (328 split commands.rs into modules, 330 consolidated chat transports into chat/ module). Messages like "timmy ambient off", "ambient off", and "ambient on" are being forwarded to the LLM instead of being handled at the bot level. The ambient toggle was previously handled in bot.rs before the command registry dispatch — it may not have been properly wired up after the code was moved to the chat/ module structure.
|
||||
|
||||
## How to Reproduce
|
||||
|
||||
1. Type "timmy ambient off" in a Matrix room where ambient mode is on
|
||||
2. Observe that the message is forwarded to Claude instead of being intercepted
|
||||
3. Same for "timmy ambient on", "ambient off", "ambient on"
|
||||
|
||||
## Actual Result
|
||||
|
||||
Ambient toggle commands are forwarded to the LLM as regular messages.
|
||||
|
||||
## Expected Result
|
||||
|
||||
Ambient toggle commands should be intercepted at the bot level and toggle ambient mode without invoking the LLM, with a confirmation message sent directly.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] 'timmy ambient on' toggles ambient mode on and sends confirmation without LLM invocation
|
||||
- [ ] 'timmy ambient off' toggles ambient mode off and sends confirmation without LLM invocation
|
||||
- [ ] Ambient toggle works after refactors 328 and 330
|
||||
- [ ] Ambient state persists in bot.toml as before
|
||||
@@ -0,0 +1,19 @@
|
||||
---
|
||||
name: "Add party emoji to done stage notification messages"
|
||||
---
|
||||
|
||||
# Story 353: Add party emoji to done stage notification messages
|
||||
|
||||
## User Story
|
||||
|
||||
As a project owner, I want to see a party emoji in the Matrix/chat notification when a story moves to done, so that completions feel celebratory.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Stage notification for done includes a party emoji (e.g. 🎉)
|
||||
- [ ] Only the done stage gets the emoji — other stage transitions stay as they are
|
||||
- [ ] Works across all chat transports (Matrix, WhatsApp, Slack)
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- TBD
|
||||
@@ -10,7 +10,7 @@ The `prompt_permission` MCP tool returns plain text ("Permission granted for '..
|
||||
|
||||
## How to Reproduce
|
||||
|
||||
1. Start the story-kit server and open the web UI
|
||||
1. Start the storkit server and open the web UI
|
||||
2. Chat with the claude-code-pty model
|
||||
3. Ask it to do something that requires a tool NOT in `.claude/settings.json` allow list (e.g. `wc -l /etc/hosts`, or WebFetch to a non-allowed domain)
|
||||
4. The permission dialog appears — click Approve
|
||||
@@ -6,7 +6,7 @@ name: "Retry limit for mergemaster and pipeline restarts"
|
||||
|
||||
## User Story
|
||||
|
||||
As a developer using story-kit, I want pipeline auto-restarts to have a configurable retry limit so that failing agents don't loop infinitely consuming CPU and API credits.
|
||||
As a developer using storkit, I want pipeline auto-restarts to have a configurable retry limit so that failing agents don't loop infinitely consuming CPU and API credits.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user