huskies: merge 875

This commit is contained in:
dave
2026-04-29 18:40:13 +00:00
parent a956a98197
commit c0801c3894
5 changed files with 339 additions and 144 deletions
+14 -30
View File
@@ -4,7 +4,7 @@
//! stages, stops any running agent, removes the worktree, deletes the file, and
//! commits the change to git.
use crate::agents::{AgentPool, AgentStatus};
use crate::agents::AgentPool;
use crate::chat::util::strip_bot_mention;
use std::path::Path;
@@ -77,39 +77,23 @@ pub async fn handle_delete(
})
.unwrap_or_else(|| story_id.clone());
// Stop any running or pending agents for this story.
let running_agents: Vec<(String, String)> = agents
.list_agents()
.unwrap_or_default()
.into_iter()
.filter(|a| {
a.story_id == story_id
&& matches!(a.status, AgentStatus::Running | AgentStatus::Pending)
})
.map(|a| (a.story_id.clone(), a.agent_name.clone()))
.collect();
let mut stopped_agents: Vec<String> = Vec::new();
for (sid, agent_name) in &running_agents {
if let Err(e) = agents.stop_agent(project_root, sid, agent_name).await {
return format!("Failed to stop agent '{agent_name}' for story {story_number}: {e}");
}
stopped_agents.push(agent_name.clone());
}
// Remove the worktree if one exists (best-effort; ignore errors).
let _ = crate::worktree::prune_worktree_sync(project_root, &story_id);
// Delete from the content store and CRDT.
crate::db::delete_content(&story_id);
crate::db::delete_item(&story_id);
let _ = crate::crdt_state::evict_item(&story_id);
let outcome = match crate::service::work_item::delete::delete_work_item(
&story_id,
project_root,
agents,
None,
)
.await
{
Ok(o) => o,
Err(e) => return e,
};
// Build the response.
let stage_label = stage_display_name(&stage);
let mut response = format!("Deleted **{story_name}** from **{stage_label}**.");
if !stopped_agents.is_empty() {
let agent_list = stopped_agents.join(", ");
if !outcome.agents_stopped.is_empty() {
let agent_list = outcome.agents_stopped.join(", ");
response.push_str(&format!(" Stopped agent(s): {agent_list}."));
}
+10 -114
View File
@@ -2,9 +2,7 @@
use crate::agents::{feature_branch_has_unmerged_changes, move_story_to_done};
use crate::http::context::AppContext;
use crate::slog_warn;
use serde_json::Value;
use std::fs;
pub(crate) fn tool_accept_story(args: &Value, ctx: &AppContext) -> Result<String, String> {
let story_id = args
@@ -39,122 +37,19 @@ pub(crate) async fn tool_delete_story(args: &Value, ctx: &AppContext) -> Result<
.ok_or("Missing required argument: story_id")?;
let project_root = ctx.services.agents.get_project_root(&ctx.state)?;
let mut failed_steps: Vec<String> = Vec::new();
// 0. Cancel any pending rate-limit retry timers for this story (bug 514).
// Must happen before stopping agents so the tick loop cannot re-spawn
// an agent after we tear everything else down.
let timer_removed = ctx.timer_store.remove(story_id);
if timer_removed {
slog_warn!("[delete_story] Cancelled pending timer for '{story_id}'");
} else {
slog_warn!("[delete_story] No pending timer found for '{story_id}'");
}
let outcome = crate::service::work_item::delete::delete_work_item(
story_id,
&project_root,
&ctx.services.agents,
Some(&ctx.timer_store),
)
.await?;
// 1. Stop any running agents for this story (best-effort).
if let Ok(agents) = ctx.services.agents.list_agents() {
for agent in agents.iter().filter(|a| a.story_id == story_id) {
match ctx
.services
.agents
.stop_agent(&project_root, story_id, &agent.agent_name)
.await
{
Ok(()) => {
slog_warn!(
"[delete_story] Stopped agent '{}' for '{story_id}'",
agent.agent_name
);
}
Err(e) => {
slog_warn!(
"[delete_story] Failed to stop agent '{}' for '{story_id}': {e}",
agent.agent_name
);
failed_steps.push(format!("stop_agent({}): {e}", agent.agent_name));
}
}
}
}
// 2. Remove agent pool entries.
let removed_count = ctx.services.agents.remove_agents_for_story(story_id);
slog_warn!("[delete_story] Removed {removed_count} agent pool entries for '{story_id}'");
// 3. Remove worktree (best-effort).
if let Ok(config) = crate::config::ProjectConfig::load(&project_root) {
match crate::worktree::remove_worktree_by_story_id(&project_root, story_id, &config).await {
Ok(()) => slog_warn!("[delete_story] Removed worktree for '{story_id}'"),
Err(e) => slog_warn!("[delete_story] Worktree removal for '{story_id}': {e}"),
}
}
// 4. Write a CRDT tombstone op so the story is evicted from the in-memory
// state machine and the deletion is persisted to crdt_ops (survives
// restart). Best-effort: legacy filesystem-only stories may not have a
// CRDT entry, so a "not found" error is expected and non-fatal.
match crate::crdt_state::evict_item(story_id) {
Ok(()) => {
slog_warn!(
"[delete_story] Evicted '{story_id}' from CRDT (tombstone persisted to crdt_ops)"
);
}
Err(e) => {
slog_warn!("[delete_story] CRDT eviction for '{story_id}': {e}");
}
}
// 5. Delete from database content store and shadow table.
let found_in_db = crate::db::read_content(story_id).is_some()
|| crate::pipeline_state::read_typed(story_id)
.ok()
.flatten()
.is_some();
crate::db::delete_item(story_id);
slog_warn!("[delete_story] Deleted '{story_id}' from content store / shadow table");
// 6. Remove the filesystem shadow file from work/N_stage/.
let sk = project_root.join(".huskies").join("work");
let stage_dirs = [
"1_backlog",
"2_current",
"3_qa",
"4_merge",
"5_done",
"6_archived",
];
let mut deleted_from_fs = false;
for stage in &stage_dirs {
let path = sk.join(stage).join(format!("{story_id}.md"));
if path.exists() {
match fs::remove_file(&path) {
Ok(()) => {
slog_warn!(
"[delete_story] Deleted filesystem shadow '{story_id}' from work/{stage}/"
);
deleted_from_fs = true;
}
Err(e) => {
slog_warn!(
"[delete_story] Failed to delete filesystem shadow '{story_id}' from work/{stage}/: {e}"
);
failed_steps.push(format!("delete_filesystem({stage}): {e}"));
}
}
break;
}
}
if !found_in_db && !deleted_from_fs && !timer_removed {
return Err(format!(
"Story '{story_id}' not found in any pipeline stage."
));
}
if !failed_steps.is_empty() {
if !outcome.failed_steps.is_empty() {
return Err(format!(
"Story '{story_id}' partially deleted. Failed steps: {}.",
failed_steps.join("; ")
outcome.failed_steps.join("; ")
));
}
@@ -166,6 +61,7 @@ mod tests {
use super::*;
use crate::http::test_helpers::test_ctx;
use serde_json::json;
use std::fs;
fn setup_git_repo_in(dir: &std::path::Path) {
std::process::Command::new("git")
+2
View File
@@ -47,5 +47,7 @@ pub mod story;
pub mod timer;
/// Wizard — multi-step project setup domain logic.
pub mod wizard;
/// Work-item — cross-cutting domain logic for all pipeline work-item types.
pub mod work_item;
/// WebSocket — real-time pipeline updates and permission prompts.
pub mod ws;
+308
View File
@@ -0,0 +1,308 @@
//! Canonical work-item deletion — stops agents, removes the worktree, evicts
//! from the CRDT, and cleans up all database and filesystem traces.
//!
//! Both `chat::transport::matrix::delete` and
//! `http::mcp::story_tools::story::delete` delegate here so the full deletion
//! sequence is defined in exactly one place.
use crate::agents::AgentPool;
use crate::service::timer::TimerStore;
use crate::slog_warn;
use std::path::Path;
/// Outcome of a successful [`delete_work_item`] call.
///
/// `failed_steps` is non-empty when the item was found but some cleanup steps
/// could not be completed (e.g. worktree removal failed). Callers decide
/// whether partial failures should be surfaced as errors.
#[derive(Debug)]
#[allow(dead_code)]
pub struct DeleteOutcome {
/// `true` when a pending rate-limit retry timer was cancelled.
pub timer_cancelled: bool,
/// Display names of agents that were stopped during the delete.
pub agents_stopped: Vec<String>,
/// `true` when the item was found and removed from the database.
pub found_in_db: bool,
/// `true` when the filesystem shadow file was found and deleted.
pub deleted_from_fs: bool,
/// Descriptions of cleanup steps that failed (non-fatal individually).
pub failed_steps: Vec<String>,
}
/// Delete a work item completely from the pipeline.
///
/// Performs the canonical deletion sequence:
/// 1. Cancel any pending rate-limit retry timer (when `timer_store` is `Some`).
/// 2. Stop all running or pending agents for the story.
/// 3. Remove the agent pool entries.
/// 4. Remove the git worktree (best-effort).
/// 5. Write a CRDT tombstone op so the deletion survives a restart.
/// 6. Delete from the database content store and shadow table.
/// 7. Remove the filesystem shadow file from `.huskies/work/N_stage/`.
///
/// Returns [`Err`] when the item was not found in any location (no DB entry,
/// no filesystem shadow, and no timer was cancelled). Returns [`Ok`]
/// otherwise; inspect [`DeleteOutcome::failed_steps`] for partial failures.
pub async fn delete_work_item(
story_id: &str,
project_root: &Path,
agents: &AgentPool,
timer_store: Option<&TimerStore>,
) -> Result<DeleteOutcome, String> {
let mut failed_steps: Vec<String> = Vec::new();
// Pre-flight: check whether the item exists in any store before doing
// destructive work. We probe the content store, CRDT, and filesystem
// shadow dirs so we can return a clear "not found" when nothing matches.
let found_in_content = crate::db::read_content(story_id).is_some();
let found_in_crdt = crate::pipeline_state::read_typed(story_id)
.ok()
.flatten()
.is_some();
let shadow_root = project_root.join(".huskies").join("work");
let stage_dirs = [
"1_backlog",
"2_current",
"3_qa",
"4_merge",
"5_done",
"6_archived",
];
let found_on_fs = stage_dirs
.iter()
.any(|s| shadow_root.join(s).join(format!("{story_id}.md")).exists());
// 0. Cancel any pending rate-limit retry timer (bug 514).
// Must happen before stopping agents so the tick loop cannot re-spawn
// an agent after we tear everything else down.
let timer_cancelled = if let Some(ts) = timer_store {
let removed = ts.remove(story_id);
if removed {
slog_warn!("[delete_work_item] Cancelled pending timer for '{story_id}'");
}
removed
} else {
false
};
if !found_in_content && !found_in_crdt && !found_on_fs && !timer_cancelled {
return Err(format!(
"Story '{story_id}' not found in any pipeline stage."
));
}
// 1. Stop any running/pending agents (best-effort).
let mut agents_stopped: Vec<String> = Vec::new();
if let Ok(agent_list) = agents.list_agents() {
for agent in agent_list.iter().filter(|a| a.story_id == story_id) {
match agents
.stop_agent(project_root, story_id, &agent.agent_name)
.await
{
Ok(()) => {
slog_warn!(
"[delete_work_item] Stopped agent '{}' for '{story_id}'",
agent.agent_name
);
agents_stopped.push(agent.agent_name.clone());
}
Err(e) => {
slog_warn!(
"[delete_work_item] Failed to stop agent '{}' for '{story_id}': {e}",
agent.agent_name
);
failed_steps.push(format!("stop_agent({}): {e}", agent.agent_name));
}
}
}
}
// 2. Remove agent pool entries.
let removed_count = agents.remove_agents_for_story(story_id);
slog_warn!("[delete_work_item] Removed {removed_count} agent pool entries for '{story_id}'");
// 3. Remove worktree (best-effort).
if let Ok(config) = crate::config::ProjectConfig::load(project_root) {
match crate::worktree::remove_worktree_by_story_id(project_root, story_id, &config).await {
Ok(()) => slog_warn!("[delete_work_item] Removed worktree for '{story_id}'"),
Err(e) => slog_warn!("[delete_work_item] Worktree removal for '{story_id}': {e}"),
}
}
// 4. Write a CRDT tombstone op so the deletion is persisted to crdt_ops
// and survives a restart. Legacy filesystem-only stories may not have
// a CRDT entry, so a "not found" error is expected and non-fatal.
match crate::crdt_state::evict_item(story_id) {
Ok(()) => {
slog_warn!("[delete_work_item] Evicted '{story_id}' from CRDT (tombstone persisted)")
}
Err(e) => slog_warn!("[delete_work_item] CRDT eviction for '{story_id}': {e}"),
}
// 5. Delete from database content store and shadow table.
crate::db::delete_item(story_id);
slog_warn!("[delete_work_item] Deleted '{story_id}' from content store / shadow table");
// 6. Remove the filesystem shadow file from work/N_stage/.
let mut deleted_from_fs = false;
for stage in &stage_dirs {
let path = shadow_root.join(stage).join(format!("{story_id}.md"));
if path.exists() {
match std::fs::remove_file(&path) {
Ok(()) => {
slog_warn!(
"[delete_work_item] Deleted filesystem shadow '{story_id}' from work/{stage}/"
);
deleted_from_fs = true;
}
Err(e) => {
slog_warn!(
"[delete_work_item] Failed to delete shadow '{story_id}' from work/{stage}/: {e}"
);
failed_steps.push(format!("delete_filesystem({stage}): {e}"));
}
}
break;
}
}
Ok(DeleteOutcome {
timer_cancelled,
agents_stopped,
found_in_db: found_in_content || found_in_crdt,
deleted_from_fs,
failed_steps,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::sync::Arc;
fn make_agents() -> Arc<AgentPool> {
Arc::new(AgentPool::new_test(3000))
}
/// After calling [`delete_work_item`] the CRDT shows `is_deleted: true`,
/// the content store has no entry, and the agent pool has no entries for
/// the story. This is the structural regression test called for by bug 874.
#[tokio::test]
async fn delete_work_item_clears_crdt_db_and_agents() {
crate::crdt_state::init_for_test();
let story_id = "8750_story_service_delete_regression";
// Seed CRDT.
crate::crdt_state::write_item(
story_id,
"1_backlog",
Some("Service Delete Regression"),
None,
None,
None,
None,
None,
None,
None,
);
// Seed content store.
crate::db::ensure_content_store();
crate::db::write_item_with_content(
story_id,
"1_backlog",
"---\nname: Service Delete Regression\n---\n",
);
let tmp = tempfile::tempdir().unwrap();
let agents = make_agents();
let result = delete_work_item(story_id, tmp.path(), &agents, None).await;
assert!(result.is_ok(), "expected Ok: {result:?}");
// CRDT must show is_deleted = true.
let dump = crate::crdt_state::dump_crdt_state(Some(story_id));
let is_deleted = dump
.items
.iter()
.any(|i| i.story_id.as_deref() == Some(story_id) && i.is_deleted);
assert!(is_deleted, "CRDT must show is_deleted=true after delete");
// Content store must be empty.
assert!(
crate::db::read_content(story_id).is_none(),
"content store must not contain the story after delete"
);
// Agent pool must have no entries for this story.
let pool_entries = agents
.list_agents()
.unwrap_or_default()
.into_iter()
.filter(|a| a.story_id == story_id)
.count();
assert_eq!(
pool_entries, 0,
"agent pool must have no entries for the deleted story"
);
}
#[tokio::test]
async fn delete_work_item_not_found_returns_err() {
let tmp = tempfile::tempdir().unwrap();
let agents = make_agents();
let result = delete_work_item("99_nonexistent", tmp.path(), &agents, None).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("not found"));
}
#[tokio::test]
async fn delete_work_item_removes_filesystem_shadow() {
let tmp = tempfile::tempdir().unwrap();
let backlog = tmp.path().join(".huskies/work/1_backlog");
fs::create_dir_all(&backlog).unwrap();
let story_file = backlog.join("8751_story_fs_delete.md");
fs::write(&story_file, "---\nname: FS Delete\n---\n").unwrap();
let agents = make_agents();
let result = delete_work_item("8751_story_fs_delete", tmp.path(), &agents, None).await;
assert!(result.is_ok(), "expected Ok: {result:?}");
assert!(!story_file.exists(), "shadow file must be deleted");
let outcome = result.unwrap();
assert!(outcome.deleted_from_fs, "deleted_from_fs must be true");
}
#[tokio::test]
async fn delete_work_item_cancels_timer() {
use crate::service::timer::TimerStore;
use chrono::Utc;
let tmp = tempfile::tempdir().unwrap();
let backlog = tmp.path().join(".huskies/work/1_backlog");
fs::create_dir_all(&backlog).unwrap();
fs::write(
backlog.join("8752_story_timer.md"),
"---\nname: Timer\n---\n",
)
.unwrap();
let timer_store = TimerStore::load(tmp.path().join(".huskies/timers.json"));
let future_time = Utc::now() + chrono::Duration::minutes(5);
timer_store
.add("8752_story_timer".to_string(), future_time)
.unwrap();
let agents = make_agents();
let result =
delete_work_item("8752_story_timer", tmp.path(), &agents, Some(&timer_store)).await;
assert!(result.is_ok(), "expected Ok: {result:?}");
let outcome = result.unwrap();
assert!(outcome.timer_cancelled, "timer_cancelled must be true");
assert!(
timer_store.list().is_empty(),
"timer store must be empty after delete"
);
}
}
+5
View File
@@ -0,0 +1,5 @@
//! Work-item service — cross-cutting domain logic that applies to all pipeline
//! work-item types (stories, bugs, spikes, refactors).
/// Canonical delete sequence for any work item type.
pub mod delete;