From 6f6d37e955ebde547213bcd502101a6c272a72f0 Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 9 Apr 2026 22:00:24 +0000 Subject: [PATCH] huskies: merge 514_story_delete_story_should_do_a_full_cleanup_crdt_op_db_row_filesystem_shadow_worktree_pending_timers --- server/src/http/mcp/story_tools.rs | 156 ++++++++++++++++++++++++++--- 1 file changed, 141 insertions(+), 15 deletions(-) diff --git a/server/src/http/mcp/story_tools.rs b/server/src/http/mcp/story_tools.rs index 18c5e9d7..3fc6c284 100644 --- a/server/src/http/mcp/story_tools.rs +++ b/server/src/http/mcp/story_tools.rs @@ -505,33 +505,78 @@ pub(super) async fn tool_delete_story(args: &Value, ctx: &AppContext) -> Result< .ok_or("Missing required argument: story_id")?; let project_root = ctx.agents.get_project_root(&ctx.state)?; + let mut failed_steps: Vec = Vec::new(); - // 1. Stop any running agents for this story (best-effort) + // 0. Cancel any pending rate-limit retry timers for this story (bug 514). + // Must happen before stopping agents so the tick loop cannot re-spawn + // an agent after we tear everything else down. + let timer_removed = ctx.timer_store.remove(story_id); + if timer_removed { + slog_warn!("[delete_story] Cancelled pending timer for '{story_id}'"); + } else { + slog_warn!("[delete_story] No pending timer found for '{story_id}'"); + } + + // 1. Stop any running agents for this story (best-effort). if let Ok(agents) = ctx.agents.list_agents() { for agent in agents.iter().filter(|a| a.story_id == story_id) { - let _ = ctx + match ctx .agents .stop_agent(&project_root, story_id, &agent.agent_name) - .await; + .await + { + Ok(()) => { + slog_warn!( + "[delete_story] Stopped agent '{}' for '{story_id}'", + agent.agent_name + ); + } + Err(e) => { + slog_warn!( + "[delete_story] Failed to stop agent '{}' for '{story_id}': {e}", + agent.agent_name + ); + failed_steps.push(format!("stop_agent({}): {e}", agent.agent_name)); + } + } } } - // 2. Remove agent pool entries - ctx.agents.remove_agents_for_story(story_id); + // 2. Remove agent pool entries. + let removed_count = ctx.agents.remove_agents_for_story(story_id); + slog_warn!("[delete_story] Removed {removed_count} agent pool entries for '{story_id}'"); - // 3. Remove worktree (best-effort) + // 3. Remove worktree (best-effort). if let Ok(config) = crate::config::ProjectConfig::load(&project_root) { - let _ = - crate::worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await; + match crate::worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await + { + Ok(()) => slog_warn!("[delete_story] Removed worktree for '{story_id}'"), + Err(e) => slog_warn!("[delete_story] Worktree removal for '{story_id}': {e}"), + } } - // 4. Delete from database content store and CRDT. + // 4. Write a CRDT tombstone op so the story is evicted from the in-memory + // state machine and the deletion is persisted to crdt_ops (survives + // restart). Best-effort: legacy filesystem-only stories may not have a + // CRDT entry, so a "not found" error is expected and non-fatal. + match crate::crdt_state::evict_item(story_id) { + Ok(()) => { + slog_warn!( + "[delete_story] Evicted '{story_id}' from CRDT (tombstone persisted to crdt_ops)" + ); + } + Err(e) => { + slog_warn!("[delete_story] CRDT eviction for '{story_id}': {e}"); + } + } + + // 5. Delete from database content store and shadow table. let found_in_db = crate::db::read_content(story_id).is_some() || crate::pipeline_state::read_typed(story_id).ok().flatten().is_some(); - crate::db::delete_item(story_id); + slog_warn!("[delete_story] Deleted '{story_id}' from content store / shadow table"); - // Also delete filesystem file if it exists (backwards compat). + // 6. Remove the filesystem shadow file from work/N_stage/. let sk = project_root.join(".huskies").join("work"); let stage_dirs = [ "1_backlog", @@ -545,19 +590,35 @@ pub(super) async fn tool_delete_story(args: &Value, ctx: &AppContext) -> Result< for stage in &stage_dirs { let path = sk.join(stage).join(format!("{story_id}.md")); if path.exists() { - let _ = fs::remove_file(&path); - slog_warn!("[delete_story] Deleted '{story_id}' from work/{stage}/"); - deleted_from_fs = true; + match fs::remove_file(&path) { + Ok(()) => { + slog_warn!( + "[delete_story] Deleted filesystem shadow '{story_id}' from work/{stage}/" + ); + deleted_from_fs = true; + } + Err(e) => { + slog_warn!("[delete_story] Failed to delete filesystem shadow '{story_id}' from work/{stage}/: {e}"); + failed_steps.push(format!("delete_filesystem({stage}): {e}")); + } + } break; } } - if !found_in_db && !deleted_from_fs { + if !found_in_db && !deleted_from_fs && !timer_removed { return Err(format!( "Story '{story_id}' not found in any pipeline stage." )); } + if !failed_steps.is_empty() { + return Err(format!( + "Story '{story_id}' partially deleted. Failed steps: {}.", + failed_steps.join("; ") + )); + } + Ok(format!("Story '{story_id}' deleted from pipeline.")) } @@ -1508,4 +1569,69 @@ mod tests { assert!(result.is_ok(), "Expected ok: {result:?}"); assert!(result.unwrap().contains("Criterion 0 checked")); } + + /// Regression test for bug 514: deleting a story must cancel its pending + /// rate-limit retry timer so the tick loop cannot re-spawn an agent. + /// + /// Repro (2026-04-09): `delete_story 478_…` returned success and removed + /// the filesystem shadow, but the timer entry in `.huskies/timers.json` + /// survived. Five minutes later the tick loop fired and re-spawned + /// `coder-1` on the deleted story. + #[tokio::test] + async fn delete_story_cancels_pending_timer() { + use chrono::Utc; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + // Create a story file in the backlog. + let backlog = root.join(".huskies/work/1_backlog"); + fs::create_dir_all(&backlog).unwrap(); + fs::write( + backlog.join("478_story_rate_limit_repro.md"), + "---\nname: \"Rate Limit Repro\"\n---\n", + ) + .unwrap(); + + let ctx = test_ctx(root); + + // Schedule a rate-limit retry timer for the story (simulates the + // auto-scheduler that fires after a rate-limit event). + let future_time = Utc::now() + chrono::Duration::minutes(5); + ctx.timer_store + .add("478_story_rate_limit_repro".to_string(), future_time) + .unwrap(); + + // Sanity: timer is present before deletion. + assert_eq!(ctx.timer_store.list().len(), 1); + + // Delete the story. + let result = tool_delete_story( + &json!({"story_id": "478_story_rate_limit_repro"}), + &ctx, + ) + .await; + assert!(result.is_ok(), "delete_story failed: {result:?}"); + + // Timer must be gone — fast-forwarding past the scheduled time should + // return no entries. + assert!( + ctx.timer_store.list().is_empty(), + "timer was not cancelled by delete_story" + ); + let far_future = Utc::now() + chrono::Duration::hours(1); + let due = ctx.timer_store.take_due(far_future); + assert!( + due.is_empty(), + "take_due returned a timer for the deleted story: {due:?}" + ); + + // Filesystem shadow must also be gone. + assert!( + !backlog + .join("478_story_rate_limit_repro.md") + .exists(), + "filesystem shadow was not removed" + ); + } }