wip(501): timer cancellation infrastructure (parallel session WIP + main.rs wiring)
Bundles in-progress work from a parallel Claude session toward fixing
bug 501 (rate-limit retry timer doesn't cancel on stop_agent / move_story
/ successful completion). This commit lands the foundation but the MCP
tool wiring is still TODO.
- server/src/chat/timer.rs: defense-in-depth check in tick_once that
skips firing a timer for stories already past 3_qa (3_qa, 4_merge,
5_done, 6_archived). The primary cancellation path will be in the
MCP tools; this guards races where a timer was scheduled before the
story was advanced and the tool didn't get a chance to cancel it.
- server/src/http/context.rs: adds `timer_store: Arc<TimerStore>` field
on AppContext so MCP tools (move_story, stop_agent, ...) can reach
the shared timer store and cancel pending entries when the user
intervenes manually. The test helper is updated to construct one.
- server/src/main.rs: wires up a TimerStore instance in the AppContext
initialiser so the binary actually compiles after the context.rs
field addition. TODO: the matrix bot's spawn_bot still creates its
own TimerStore instance (in chat/transport/matrix/bot/run.rs:220-227)
rather than consuming the shared one — that refactor is the next
step in the bug 501 fix.
What is NOT in this commit and is needed to actually fix bug 501:
- The MCP tool side (move_story, stop_agent, delete_story) does not
yet call timer_store.cancel(story_id) when invoked
- The matrix bot's spawn_bot does not yet consume the shared
timer_store from AppContext — it still creates its own
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -200,6 +200,28 @@ async fn tick_once(
|
|||||||
for entry in due {
|
for entry in due {
|
||||||
crate::slog!("[timer] Timer fired for story {}", entry.story_id);
|
crate::slog!("[timer] Timer fired for story {}", entry.story_id);
|
||||||
|
|
||||||
|
// Bug 501: Defense-in-depth check. If the story has already advanced
|
||||||
|
// past the active-work stages (3_qa, 4_merge, 5_done, 6_archived),
|
||||||
|
// there is nothing to resume — the timer is stale and should no-op.
|
||||||
|
// The primary cancellation paths (move_story MCP → backlog, stop_agent)
|
||||||
|
// remove the timer before it fires; this guard covers the case where
|
||||||
|
// cancellation was not yet called or the story raced forward through
|
||||||
|
// the pipeline while the timer was pending.
|
||||||
|
if let Some(item) = crate::crdt_state::read_item(&entry.story_id) {
|
||||||
|
match item.stage.as_str() {
|
||||||
|
"3_qa" | "4_merge" | "5_done" | "6_archived" => {
|
||||||
|
crate::slog!(
|
||||||
|
"[timer] Skipping timer for story {} — currently in '{}', \
|
||||||
|
not in backlog/current; timer is stale",
|
||||||
|
entry.story_id,
|
||||||
|
item.stage
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Move from backlog to current if needed — the auto-assign
|
// Move from backlog to current if needed — the auto-assign
|
||||||
// watcher will then start an agent automatically.
|
// watcher will then start an agent automatically.
|
||||||
if let Err(e) =
|
if let Err(e) =
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use crate::agents::{AgentPool, ReconciliationEvent};
|
use crate::agents::{AgentPool, ReconciliationEvent};
|
||||||
|
use crate::chat::timer::TimerStore;
|
||||||
use crate::io::watcher::WatcherEvent;
|
use crate::io::watcher::WatcherEvent;
|
||||||
use crate::rebuild::{BotShutdownNotifier, ShutdownReason};
|
use crate::rebuild::{BotShutdownNotifier, ShutdownReason};
|
||||||
use crate::state::SessionState;
|
use crate::state::SessionState;
|
||||||
@@ -67,6 +68,13 @@ pub struct AppContext {
|
|||||||
/// `None` when no Matrix bot is configured.
|
/// `None` when no Matrix bot is configured.
|
||||||
pub matrix_shutdown_tx:
|
pub matrix_shutdown_tx:
|
||||||
Option<Arc<tokio::sync::watch::Sender<Option<ShutdownReason>>>>,
|
Option<Arc<tokio::sync::watch::Sender<Option<ShutdownReason>>>>,
|
||||||
|
/// Shared rate-limit retry timer store.
|
||||||
|
///
|
||||||
|
/// Used by MCP tools (`move_story`, `stop_agent`) to cancel pending timers
|
||||||
|
/// when the user manually intervenes (bug 501). Shared with the tick loop
|
||||||
|
/// spawned by the bot so that cancellations take effect in-memory rather
|
||||||
|
/// than only on disk.
|
||||||
|
pub timer_store: Arc<TimerStore>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -78,6 +86,9 @@ impl AppContext {
|
|||||||
let (watcher_tx, _) = broadcast::channel(64);
|
let (watcher_tx, _) = broadcast::channel(64);
|
||||||
let (reconciliation_tx, _) = broadcast::channel(64);
|
let (reconciliation_tx, _) = broadcast::channel(64);
|
||||||
let (perm_tx, perm_rx) = mpsc::unbounded_channel();
|
let (perm_tx, perm_rx) = mpsc::unbounded_channel();
|
||||||
|
let timer_store = Arc::new(TimerStore::load(
|
||||||
|
project_root.join(".huskies").join("timers.json"),
|
||||||
|
));
|
||||||
Self {
|
Self {
|
||||||
state: Arc::new(state),
|
state: Arc::new(state),
|
||||||
store: Arc::new(JsonFileStore::new(store_path).unwrap()),
|
store: Arc::new(JsonFileStore::new(store_path).unwrap()),
|
||||||
@@ -90,6 +101,7 @@ impl AppContext {
|
|||||||
qa_app_process: Arc::new(std::sync::Mutex::new(None)),
|
qa_app_process: Arc::new(std::sync::Mutex::new(None)),
|
||||||
bot_shutdown: None,
|
bot_shutdown: None,
|
||||||
matrix_shutdown_tx: None,
|
matrix_shutdown_tx: None,
|
||||||
|
timer_store,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -631,6 +631,21 @@ async fn main() -> Result<(), std::io::Error> {
|
|||||||
let matrix_shutdown_tx = Arc::new(matrix_shutdown_tx);
|
let matrix_shutdown_tx = Arc::new(matrix_shutdown_tx);
|
||||||
let matrix_shutdown_tx_for_rebuild = Arc::clone(&matrix_shutdown_tx);
|
let matrix_shutdown_tx_for_rebuild = Arc::clone(&matrix_shutdown_tx);
|
||||||
|
|
||||||
|
// Bug 501: shared rate-limit retry timer store, accessible from MCP tools
|
||||||
|
// via AppContext so manual interventions (move_story → backlog, stop_agent)
|
||||||
|
// can cancel pending timers in-memory rather than only on disk.
|
||||||
|
//
|
||||||
|
// TODO(bug 501): the matrix bot currently spawns its own TimerStore instance
|
||||||
|
// in `chat::transport::matrix::bot::run::spawn_bot`. Refactor to consume this
|
||||||
|
// shared instance via `AppContext.timer_store` so cancellations from MCP
|
||||||
|
// tools and the bot's tick loop see the same in-memory state.
|
||||||
|
let timer_store = std::sync::Arc::new(crate::chat::timer::TimerStore::load(
|
||||||
|
startup_root
|
||||||
|
.as_ref()
|
||||||
|
.map(|r| r.join(".huskies").join("timers.json"))
|
||||||
|
.unwrap_or_else(|| std::path::PathBuf::from("/tmp/huskies-timers.json")),
|
||||||
|
));
|
||||||
|
|
||||||
let ctx = AppContext {
|
let ctx = AppContext {
|
||||||
state: app_state,
|
state: app_state,
|
||||||
store,
|
store,
|
||||||
@@ -643,6 +658,7 @@ async fn main() -> Result<(), std::io::Error> {
|
|||||||
qa_app_process: Arc::new(std::sync::Mutex::new(None)),
|
qa_app_process: Arc::new(std::sync::Mutex::new(None)),
|
||||||
bot_shutdown: bot_shutdown_notifier.clone(),
|
bot_shutdown: bot_shutdown_notifier.clone(),
|
||||||
matrix_shutdown_tx: Some(Arc::clone(&matrix_shutdown_tx)),
|
matrix_shutdown_tx: Some(Arc::clone(&matrix_shutdown_tx)),
|
||||||
|
timer_store,
|
||||||
};
|
};
|
||||||
|
|
||||||
let app = build_routes(ctx, whatsapp_ctx.clone(), slack_ctx.clone(), port);
|
let app = build_routes(ctx, whatsapp_ctx.clone(), slack_ctx.clone(), port);
|
||||||
|
|||||||
Reference in New Issue
Block a user