From 13635b01bcc2502e14bdf7a751162c35e30c24b0 Mon Sep 17 00:00:00 2001 From: Timmy Date: Thu, 9 Apr 2026 21:28:48 +0100 Subject: [PATCH] wip(501): timer cancellation infrastructure (parallel session WIP + main.rs wiring) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bundles in-progress work from a parallel Claude session toward fixing bug 501 (rate-limit retry timer doesn't cancel on stop_agent / move_story / successful completion). This commit lands the foundation but the MCP tool wiring is still TODO. - server/src/chat/timer.rs: defense-in-depth check in tick_once that skips firing a timer for stories already past 3_qa (3_qa, 4_merge, 5_done, 6_archived). The primary cancellation path will be in the MCP tools; this guards races where a timer was scheduled before the story was advanced and the tool didn't get a chance to cancel it. - server/src/http/context.rs: adds `timer_store: Arc` field on AppContext so MCP tools (move_story, stop_agent, ...) can reach the shared timer store and cancel pending entries when the user intervenes manually. The test helper is updated to construct one. - server/src/main.rs: wires up a TimerStore instance in the AppContext initialiser so the binary actually compiles after the context.rs field addition. TODO: the matrix bot's spawn_bot still creates its own TimerStore instance (in chat/transport/matrix/bot/run.rs:220-227) rather than consuming the shared one — that refactor is the next step in the bug 501 fix. What is NOT in this commit and is needed to actually fix bug 501: - The MCP tool side (move_story, stop_agent, delete_story) does not yet call timer_store.cancel(story_id) when invoked - The matrix bot's spawn_bot does not yet consume the shared timer_store from AppContext — it still creates its own Co-Authored-By: Claude Opus 4.6 (1M context) --- server/src/chat/timer.rs | 22 ++++++++++++++++++++++ server/src/http/context.rs | 12 ++++++++++++ server/src/main.rs | 16 ++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/server/src/chat/timer.rs b/server/src/chat/timer.rs index 36883a01..eb4c8afa 100644 --- a/server/src/chat/timer.rs +++ b/server/src/chat/timer.rs @@ -200,6 +200,28 @@ async fn tick_once( for entry in due { crate::slog!("[timer] Timer fired for story {}", entry.story_id); + // Bug 501: Defense-in-depth check. If the story has already advanced + // past the active-work stages (3_qa, 4_merge, 5_done, 6_archived), + // there is nothing to resume — the timer is stale and should no-op. + // The primary cancellation paths (move_story MCP → backlog, stop_agent) + // remove the timer before it fires; this guard covers the case where + // cancellation was not yet called or the story raced forward through + // the pipeline while the timer was pending. + if let Some(item) = crate::crdt_state::read_item(&entry.story_id) { + match item.stage.as_str() { + "3_qa" | "4_merge" | "5_done" | "6_archived" => { + crate::slog!( + "[timer] Skipping timer for story {} — currently in '{}', \ + not in backlog/current; timer is stale", + entry.story_id, + item.stage + ); + continue; + } + _ => {} + } + } + // Move from backlog to current if needed — the auto-assign // watcher will then start an agent automatically. if let Err(e) = diff --git a/server/src/http/context.rs b/server/src/http/context.rs index d9a41e49..56f506a9 100644 --- a/server/src/http/context.rs +++ b/server/src/http/context.rs @@ -1,4 +1,5 @@ use crate::agents::{AgentPool, ReconciliationEvent}; +use crate::chat::timer::TimerStore; use crate::io::watcher::WatcherEvent; use crate::rebuild::{BotShutdownNotifier, ShutdownReason}; use crate::state::SessionState; @@ -67,6 +68,13 @@ pub struct AppContext { /// `None` when no Matrix bot is configured. pub matrix_shutdown_tx: Option>>>, + /// Shared rate-limit retry timer store. + /// + /// Used by MCP tools (`move_story`, `stop_agent`) to cancel pending timers + /// when the user manually intervenes (bug 501). Shared with the tick loop + /// spawned by the bot so that cancellations take effect in-memory rather + /// than only on disk. + pub timer_store: Arc, } #[cfg(test)] @@ -78,6 +86,9 @@ impl AppContext { let (watcher_tx, _) = broadcast::channel(64); let (reconciliation_tx, _) = broadcast::channel(64); let (perm_tx, perm_rx) = mpsc::unbounded_channel(); + let timer_store = Arc::new(TimerStore::load( + project_root.join(".huskies").join("timers.json"), + )); Self { state: Arc::new(state), store: Arc::new(JsonFileStore::new(store_path).unwrap()), @@ -90,6 +101,7 @@ impl AppContext { qa_app_process: Arc::new(std::sync::Mutex::new(None)), bot_shutdown: None, matrix_shutdown_tx: None, + timer_store, } } } diff --git a/server/src/main.rs b/server/src/main.rs index 3d299c5f..7cc98ad4 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -631,6 +631,21 @@ async fn main() -> Result<(), std::io::Error> { let matrix_shutdown_tx = Arc::new(matrix_shutdown_tx); let matrix_shutdown_tx_for_rebuild = Arc::clone(&matrix_shutdown_tx); + // Bug 501: shared rate-limit retry timer store, accessible from MCP tools + // via AppContext so manual interventions (move_story → backlog, stop_agent) + // can cancel pending timers in-memory rather than only on disk. + // + // TODO(bug 501): the matrix bot currently spawns its own TimerStore instance + // in `chat::transport::matrix::bot::run::spawn_bot`. Refactor to consume this + // shared instance via `AppContext.timer_store` so cancellations from MCP + // tools and the bot's tick loop see the same in-memory state. + let timer_store = std::sync::Arc::new(crate::chat::timer::TimerStore::load( + startup_root + .as_ref() + .map(|r| r.join(".huskies").join("timers.json")) + .unwrap_or_else(|| std::path::PathBuf::from("/tmp/huskies-timers.json")), + )); + let ctx = AppContext { state: app_state, store, @@ -643,6 +658,7 @@ async fn main() -> Result<(), std::io::Error> { qa_app_process: Arc::new(std::sync::Mutex::new(None)), bot_shutdown: bot_shutdown_notifier.clone(), matrix_shutdown_tx: Some(Arc::clone(&matrix_shutdown_tx)), + timer_store, }; let app = build_routes(ctx, whatsapp_ctx.clone(), slack_ctx.clone(), port);