huskies: merge 514_story_delete_story_should_do_a_full_cleanup_crdt_op_db_row_filesystem_shadow_worktree_pending_timers

This commit is contained in:
dave
2026-04-09 22:00:24 +00:00
parent 84717b04bd
commit 6f6d37e955
+140 -14
View File
@@ -505,33 +505,78 @@ pub(super) async fn tool_delete_story(args: &Value, ctx: &AppContext) -> Result<
.ok_or("Missing required argument: story_id")?;
let project_root = ctx.agents.get_project_root(&ctx.state)?;
let mut failed_steps: Vec<String> = Vec::new();
// 1. Stop any running agents for this story (best-effort)
// 0. Cancel any pending rate-limit retry timers for this story (bug 514).
// Must happen before stopping agents so the tick loop cannot re-spawn
// an agent after we tear everything else down.
let timer_removed = ctx.timer_store.remove(story_id);
if timer_removed {
slog_warn!("[delete_story] Cancelled pending timer for '{story_id}'");
} else {
slog_warn!("[delete_story] No pending timer found for '{story_id}'");
}
// 1. Stop any running agents for this story (best-effort).
if let Ok(agents) = ctx.agents.list_agents() {
for agent in agents.iter().filter(|a| a.story_id == story_id) {
let _ = ctx
match ctx
.agents
.stop_agent(&project_root, story_id, &agent.agent_name)
.await;
.await
{
Ok(()) => {
slog_warn!(
"[delete_story] Stopped agent '{}' for '{story_id}'",
agent.agent_name
);
}
Err(e) => {
slog_warn!(
"[delete_story] Failed to stop agent '{}' for '{story_id}': {e}",
agent.agent_name
);
failed_steps.push(format!("stop_agent({}): {e}", agent.agent_name));
}
}
}
}
// 2. Remove agent pool entries
ctx.agents.remove_agents_for_story(story_id);
// 2. Remove agent pool entries.
let removed_count = ctx.agents.remove_agents_for_story(story_id);
slog_warn!("[delete_story] Removed {removed_count} agent pool entries for '{story_id}'");
// 3. Remove worktree (best-effort)
// 3. Remove worktree (best-effort).
if let Ok(config) = crate::config::ProjectConfig::load(&project_root) {
let _ =
crate::worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await;
match crate::worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await
{
Ok(()) => slog_warn!("[delete_story] Removed worktree for '{story_id}'"),
Err(e) => slog_warn!("[delete_story] Worktree removal for '{story_id}': {e}"),
}
}
// 4. Delete from database content store and CRDT.
// 4. Write a CRDT tombstone op so the story is evicted from the in-memory
// state machine and the deletion is persisted to crdt_ops (survives
// restart). Best-effort: legacy filesystem-only stories may not have a
// CRDT entry, so a "not found" error is expected and non-fatal.
match crate::crdt_state::evict_item(story_id) {
Ok(()) => {
slog_warn!(
"[delete_story] Evicted '{story_id}' from CRDT (tombstone persisted to crdt_ops)"
);
}
Err(e) => {
slog_warn!("[delete_story] CRDT eviction for '{story_id}': {e}");
}
}
// 5. Delete from database content store and shadow table.
let found_in_db = crate::db::read_content(story_id).is_some()
|| crate::pipeline_state::read_typed(story_id).ok().flatten().is_some();
crate::db::delete_item(story_id);
slog_warn!("[delete_story] Deleted '{story_id}' from content store / shadow table");
// Also delete filesystem file if it exists (backwards compat).
// 6. Remove the filesystem shadow file from work/N_stage/.
let sk = project_root.join(".huskies").join("work");
let stage_dirs = [
"1_backlog",
@@ -545,19 +590,35 @@ pub(super) async fn tool_delete_story(args: &Value, ctx: &AppContext) -> Result<
for stage in &stage_dirs {
let path = sk.join(stage).join(format!("{story_id}.md"));
if path.exists() {
let _ = fs::remove_file(&path);
slog_warn!("[delete_story] Deleted '{story_id}' from work/{stage}/");
match fs::remove_file(&path) {
Ok(()) => {
slog_warn!(
"[delete_story] Deleted filesystem shadow '{story_id}' from work/{stage}/"
);
deleted_from_fs = true;
}
Err(e) => {
slog_warn!("[delete_story] Failed to delete filesystem shadow '{story_id}' from work/{stage}/: {e}");
failed_steps.push(format!("delete_filesystem({stage}): {e}"));
}
}
break;
}
}
if !found_in_db && !deleted_from_fs {
if !found_in_db && !deleted_from_fs && !timer_removed {
return Err(format!(
"Story '{story_id}' not found in any pipeline stage."
));
}
if !failed_steps.is_empty() {
return Err(format!(
"Story '{story_id}' partially deleted. Failed steps: {}.",
failed_steps.join("; ")
));
}
Ok(format!("Story '{story_id}' deleted from pipeline."))
}
@@ -1508,4 +1569,69 @@ mod tests {
assert!(result.is_ok(), "Expected ok: {result:?}");
assert!(result.unwrap().contains("Criterion 0 checked"));
}
/// Regression test for bug 514: deleting a story must cancel its pending
/// rate-limit retry timer so the tick loop cannot re-spawn an agent.
///
/// Repro (2026-04-09): `delete_story 478_…` returned success and removed
/// the filesystem shadow, but the timer entry in `.huskies/timers.json`
/// survived. Five minutes later the tick loop fired and re-spawned
/// `coder-1` on the deleted story.
#[tokio::test]
async fn delete_story_cancels_pending_timer() {
use chrono::Utc;
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path();
// Create a story file in the backlog.
let backlog = root.join(".huskies/work/1_backlog");
fs::create_dir_all(&backlog).unwrap();
fs::write(
backlog.join("478_story_rate_limit_repro.md"),
"---\nname: \"Rate Limit Repro\"\n---\n",
)
.unwrap();
let ctx = test_ctx(root);
// Schedule a rate-limit retry timer for the story (simulates the
// auto-scheduler that fires after a rate-limit event).
let future_time = Utc::now() + chrono::Duration::minutes(5);
ctx.timer_store
.add("478_story_rate_limit_repro".to_string(), future_time)
.unwrap();
// Sanity: timer is present before deletion.
assert_eq!(ctx.timer_store.list().len(), 1);
// Delete the story.
let result = tool_delete_story(
&json!({"story_id": "478_story_rate_limit_repro"}),
&ctx,
)
.await;
assert!(result.is_ok(), "delete_story failed: {result:?}");
// Timer must be gone — fast-forwarding past the scheduled time should
// return no entries.
assert!(
ctx.timer_store.list().is_empty(),
"timer was not cancelled by delete_story"
);
let far_future = Utc::now() + chrono::Duration::hours(1);
let due = ctx.timer_store.take_due(far_future);
assert!(
due.is_empty(),
"take_due returned a timer for the deleted story: {due:?}"
);
// Filesystem shadow must also be gone.
assert!(
!backlog
.join("478_story_rate_limit_repro.md")
.exists(),
"filesystem shadow was not removed"
);
}
}