huskies: merge 514_story_delete_story_should_do_a_full_cleanup_crdt_op_db_row_filesystem_shadow_worktree_pending_timers
This commit is contained in:
@@ -505,33 +505,78 @@ pub(super) async fn tool_delete_story(args: &Value, ctx: &AppContext) -> Result<
|
||||
.ok_or("Missing required argument: story_id")?;
|
||||
|
||||
let project_root = ctx.agents.get_project_root(&ctx.state)?;
|
||||
let mut failed_steps: Vec<String> = Vec::new();
|
||||
|
||||
// 1. Stop any running agents for this story (best-effort)
|
||||
// 0. Cancel any pending rate-limit retry timers for this story (bug 514).
|
||||
// Must happen before stopping agents so the tick loop cannot re-spawn
|
||||
// an agent after we tear everything else down.
|
||||
let timer_removed = ctx.timer_store.remove(story_id);
|
||||
if timer_removed {
|
||||
slog_warn!("[delete_story] Cancelled pending timer for '{story_id}'");
|
||||
} else {
|
||||
slog_warn!("[delete_story] No pending timer found for '{story_id}'");
|
||||
}
|
||||
|
||||
// 1. Stop any running agents for this story (best-effort).
|
||||
if let Ok(agents) = ctx.agents.list_agents() {
|
||||
for agent in agents.iter().filter(|a| a.story_id == story_id) {
|
||||
let _ = ctx
|
||||
match ctx
|
||||
.agents
|
||||
.stop_agent(&project_root, story_id, &agent.agent_name)
|
||||
.await;
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
slog_warn!(
|
||||
"[delete_story] Stopped agent '{}' for '{story_id}'",
|
||||
agent.agent_name
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
slog_warn!(
|
||||
"[delete_story] Failed to stop agent '{}' for '{story_id}': {e}",
|
||||
agent.agent_name
|
||||
);
|
||||
failed_steps.push(format!("stop_agent({}): {e}", agent.agent_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Remove agent pool entries
|
||||
ctx.agents.remove_agents_for_story(story_id);
|
||||
// 2. Remove agent pool entries.
|
||||
let removed_count = ctx.agents.remove_agents_for_story(story_id);
|
||||
slog_warn!("[delete_story] Removed {removed_count} agent pool entries for '{story_id}'");
|
||||
|
||||
// 3. Remove worktree (best-effort)
|
||||
// 3. Remove worktree (best-effort).
|
||||
if let Ok(config) = crate::config::ProjectConfig::load(&project_root) {
|
||||
let _ =
|
||||
crate::worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await;
|
||||
match crate::worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await
|
||||
{
|
||||
Ok(()) => slog_warn!("[delete_story] Removed worktree for '{story_id}'"),
|
||||
Err(e) => slog_warn!("[delete_story] Worktree removal for '{story_id}': {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Delete from database content store and CRDT.
|
||||
// 4. Write a CRDT tombstone op so the story is evicted from the in-memory
|
||||
// state machine and the deletion is persisted to crdt_ops (survives
|
||||
// restart). Best-effort: legacy filesystem-only stories may not have a
|
||||
// CRDT entry, so a "not found" error is expected and non-fatal.
|
||||
match crate::crdt_state::evict_item(story_id) {
|
||||
Ok(()) => {
|
||||
slog_warn!(
|
||||
"[delete_story] Evicted '{story_id}' from CRDT (tombstone persisted to crdt_ops)"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
slog_warn!("[delete_story] CRDT eviction for '{story_id}': {e}");
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Delete from database content store and shadow table.
|
||||
let found_in_db = crate::db::read_content(story_id).is_some()
|
||||
|| crate::pipeline_state::read_typed(story_id).ok().flatten().is_some();
|
||||
|
||||
crate::db::delete_item(story_id);
|
||||
slog_warn!("[delete_story] Deleted '{story_id}' from content store / shadow table");
|
||||
|
||||
// Also delete filesystem file if it exists (backwards compat).
|
||||
// 6. Remove the filesystem shadow file from work/N_stage/.
|
||||
let sk = project_root.join(".huskies").join("work");
|
||||
let stage_dirs = [
|
||||
"1_backlog",
|
||||
@@ -545,19 +590,35 @@ pub(super) async fn tool_delete_story(args: &Value, ctx: &AppContext) -> Result<
|
||||
for stage in &stage_dirs {
|
||||
let path = sk.join(stage).join(format!("{story_id}.md"));
|
||||
if path.exists() {
|
||||
let _ = fs::remove_file(&path);
|
||||
slog_warn!("[delete_story] Deleted '{story_id}' from work/{stage}/");
|
||||
match fs::remove_file(&path) {
|
||||
Ok(()) => {
|
||||
slog_warn!(
|
||||
"[delete_story] Deleted filesystem shadow '{story_id}' from work/{stage}/"
|
||||
);
|
||||
deleted_from_fs = true;
|
||||
}
|
||||
Err(e) => {
|
||||
slog_warn!("[delete_story] Failed to delete filesystem shadow '{story_id}' from work/{stage}/: {e}");
|
||||
failed_steps.push(format!("delete_filesystem({stage}): {e}"));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !found_in_db && !deleted_from_fs {
|
||||
if !found_in_db && !deleted_from_fs && !timer_removed {
|
||||
return Err(format!(
|
||||
"Story '{story_id}' not found in any pipeline stage."
|
||||
));
|
||||
}
|
||||
|
||||
if !failed_steps.is_empty() {
|
||||
return Err(format!(
|
||||
"Story '{story_id}' partially deleted. Failed steps: {}.",
|
||||
failed_steps.join("; ")
|
||||
));
|
||||
}
|
||||
|
||||
Ok(format!("Story '{story_id}' deleted from pipeline."))
|
||||
}
|
||||
|
||||
@@ -1508,4 +1569,69 @@ mod tests {
|
||||
assert!(result.is_ok(), "Expected ok: {result:?}");
|
||||
assert!(result.unwrap().contains("Criterion 0 checked"));
|
||||
}
|
||||
|
||||
/// Regression test for bug 514: deleting a story must cancel its pending
|
||||
/// rate-limit retry timer so the tick loop cannot re-spawn an agent.
|
||||
///
|
||||
/// Repro (2026-04-09): `delete_story 478_…` returned success and removed
|
||||
/// the filesystem shadow, but the timer entry in `.huskies/timers.json`
|
||||
/// survived. Five minutes later the tick loop fired and re-spawned
|
||||
/// `coder-1` on the deleted story.
|
||||
#[tokio::test]
|
||||
async fn delete_story_cancels_pending_timer() {
|
||||
use chrono::Utc;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
|
||||
// Create a story file in the backlog.
|
||||
let backlog = root.join(".huskies/work/1_backlog");
|
||||
fs::create_dir_all(&backlog).unwrap();
|
||||
fs::write(
|
||||
backlog.join("478_story_rate_limit_repro.md"),
|
||||
"---\nname: \"Rate Limit Repro\"\n---\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let ctx = test_ctx(root);
|
||||
|
||||
// Schedule a rate-limit retry timer for the story (simulates the
|
||||
// auto-scheduler that fires after a rate-limit event).
|
||||
let future_time = Utc::now() + chrono::Duration::minutes(5);
|
||||
ctx.timer_store
|
||||
.add("478_story_rate_limit_repro".to_string(), future_time)
|
||||
.unwrap();
|
||||
|
||||
// Sanity: timer is present before deletion.
|
||||
assert_eq!(ctx.timer_store.list().len(), 1);
|
||||
|
||||
// Delete the story.
|
||||
let result = tool_delete_story(
|
||||
&json!({"story_id": "478_story_rate_limit_repro"}),
|
||||
&ctx,
|
||||
)
|
||||
.await;
|
||||
assert!(result.is_ok(), "delete_story failed: {result:?}");
|
||||
|
||||
// Timer must be gone — fast-forwarding past the scheduled time should
|
||||
// return no entries.
|
||||
assert!(
|
||||
ctx.timer_store.list().is_empty(),
|
||||
"timer was not cancelled by delete_story"
|
||||
);
|
||||
let far_future = Utc::now() + chrono::Duration::hours(1);
|
||||
let due = ctx.timer_store.take_due(far_future);
|
||||
assert!(
|
||||
due.is_empty(),
|
||||
"take_due returned a timer for the deleted story: {due:?}"
|
||||
);
|
||||
|
||||
// Filesystem shadow must also be gone.
|
||||
assert!(
|
||||
!backlog
|
||||
.join("478_story_rate_limit_repro.md")
|
||||
.exists(),
|
||||
"filesystem shadow was not removed"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user