huskies: merge 645_bug_agent_runtime_panics_with_output_write_bytes_is_ok_assertion_marking_stories_falsely_blocked
This commit is contained in:
@@ -115,8 +115,78 @@ impl AgentPool {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Bug 645: Before retry/block, check if the agent left committed
|
||||
// work that compiles. An agent may crash mid-output (e.g. Claude
|
||||
// Code CLI PTY write assertion) after having already committed valid
|
||||
// code. When committed work survives and `cargo check` passes,
|
||||
// advance to QA instead of wasting retries.
|
||||
let work_survived = worktree_path.as_ref().is_some_and(|wt_path| {
|
||||
crate::agents::gates::worktree_has_committed_work(wt_path)
|
||||
&& crate::agents::gates::cargo_check_in_worktree(wt_path)
|
||||
});
|
||||
if work_survived {
|
||||
slog!(
|
||||
"[pipeline] Coder '{agent_name}' failed gates for '{story_id}' but \
|
||||
committed work survives and compiles. Advancing to QA instead of \
|
||||
retrying (bug 645)."
|
||||
);
|
||||
let qa_mode = {
|
||||
let item_type = crate::agents::lifecycle::item_type_from_id(story_id);
|
||||
if item_type == "spike" {
|
||||
crate::io::story_metadata::QaMode::Human
|
||||
} else {
|
||||
let default_qa = config.default_qa_mode();
|
||||
resolve_qa_mode_from_store(&project_root, story_id, default_qa)
|
||||
}
|
||||
};
|
||||
match qa_mode {
|
||||
crate::io::story_metadata::QaMode::Server => {
|
||||
if let Err(e) = crate::agents::lifecycle::move_story_to_merge(
|
||||
&project_root,
|
||||
story_id,
|
||||
) {
|
||||
slog_error!(
|
||||
"[pipeline] Failed to move '{story_id}' to 4_merge/: {e}"
|
||||
);
|
||||
} else {
|
||||
self.start_mergemaster_or_block(&project_root, story_id)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
crate::io::story_metadata::QaMode::Agent => {
|
||||
if let Err(e) = crate::agents::lifecycle::move_story_to_qa(
|
||||
&project_root,
|
||||
story_id,
|
||||
) {
|
||||
slog_error!(
|
||||
"[pipeline] Failed to move '{story_id}' to 3_qa/: {e}"
|
||||
);
|
||||
} else if let Err(e) = self
|
||||
.start_agent(&project_root, story_id, Some("qa"), None, None)
|
||||
.await
|
||||
{
|
||||
slog_error!(
|
||||
"[pipeline] Failed to start qa for '{story_id}': {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
crate::io::story_metadata::QaMode::Human => {
|
||||
if let Err(e) = crate::agents::lifecycle::move_story_to_qa(
|
||||
&project_root,
|
||||
story_id,
|
||||
) {
|
||||
slog_error!(
|
||||
"[pipeline] Failed to move '{story_id}' to 3_qa/: {e}"
|
||||
);
|
||||
} else {
|
||||
write_review_hold_to_store(story_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else
|
||||
// Increment retry count and check if blocked.
|
||||
if let Some(reason) = should_block_story(story_id, config.max_retries, "coder")
|
||||
if let Some(reason) =
|
||||
should_block_story(story_id, config.max_retries, "coder")
|
||||
{
|
||||
// Story has exceeded retry limit — do not restart.
|
||||
let _ = self.watcher_tx.send(WatcherEvent::StoryBlocked {
|
||||
@@ -1062,4 +1132,218 @@ stage = "qa"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── bug 645: work-survived check advances to QA instead of blocking ──
|
||||
|
||||
/// Integration test: when a coder agent fails gates but committed work
|
||||
/// survives and compiles, the story advances to QA (not retry/block).
|
||||
/// Simulates an agent that commits work and then dies mid-output.
|
||||
#[tokio::test]
|
||||
async fn work_survived_advances_to_qa_instead_of_blocking() {
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
|
||||
// Init a git repo with a minimal Cargo project.
|
||||
Command::new("git")
|
||||
.args(["init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.email", "test@test.com"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.name", "Test"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
fs::write(
|
||||
root.join("Cargo.toml"),
|
||||
"[package]\nname = \"test_proj\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
|
||||
)
|
||||
.unwrap();
|
||||
fs::create_dir_all(root.join("src")).unwrap();
|
||||
fs::write(root.join("src/lib.rs"), "// empty\n").unwrap();
|
||||
Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Create a worktree on a feature branch.
|
||||
let wt_path = tmp.path().join("wt");
|
||||
Command::new("git")
|
||||
.args([
|
||||
"worktree",
|
||||
"add",
|
||||
&wt_path.to_string_lossy(),
|
||||
"-b",
|
||||
"feature/story-9945_story_survived",
|
||||
])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Commit valid code on the feature branch.
|
||||
fs::write(wt_path.join("src/lib.rs"), "pub fn survived() {}\n").unwrap();
|
||||
Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(&wt_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["commit", "-m", "add survived fn"])
|
||||
.current_dir(&wt_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Set up the story in the content store.
|
||||
crate::db::ensure_content_store();
|
||||
crate::db::write_content("9945_story_survived", "---\nname: Survived Test\n---\n");
|
||||
crate::db::write_item_with_content(
|
||||
"9945_story_survived",
|
||||
"2_current",
|
||||
"---\nname: Survived Test\n---\n",
|
||||
);
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
|
||||
// Simulate coder failing gates (e.g. agent crashed, dirty worktree).
|
||||
pool.run_pipeline_advance(
|
||||
"9945_story_survived",
|
||||
"coder-1",
|
||||
CompletionReport {
|
||||
summary: "Agent crashed".to_string(),
|
||||
gates_passed: false,
|
||||
gate_output: "Worktree has uncommitted changes".to_string(),
|
||||
},
|
||||
Some(root.to_path_buf()),
|
||||
Some(wt_path),
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Story should have advanced — content store should reflect the move.
|
||||
// The work-survived check should have moved it to QA (or merge for
|
||||
// server qa mode), NOT incremented retry_count.
|
||||
let content = crate::db::read_content("9945_story_survived")
|
||||
.expect("story should exist in content store");
|
||||
assert!(
|
||||
!content.contains("blocked"),
|
||||
"story should NOT be blocked when committed work survives: {content}"
|
||||
);
|
||||
assert!(
|
||||
!content.contains("retry_count"),
|
||||
"story should NOT have retry_count when work survived: {content}"
|
||||
);
|
||||
}
|
||||
|
||||
/// Backwards-compat: agents that die WITHOUT committed work still get
|
||||
/// the existing retry/block treatment.
|
||||
#[tokio::test]
|
||||
async fn no_committed_work_still_retries_and_blocks() {
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let root = tmp.path();
|
||||
|
||||
// Init a git repo (no Cargo project needed — cargo check will fail).
|
||||
Command::new("git")
|
||||
.args(["init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.email", "test@test.com"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["config", "user.name", "Test"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
Command::new("git")
|
||||
.args(["commit", "--allow-empty", "-m", "init"])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Create a worktree with NO commits on the feature branch.
|
||||
let wt_path = tmp.path().join("wt");
|
||||
Command::new("git")
|
||||
.args([
|
||||
"worktree",
|
||||
"add",
|
||||
&wt_path.to_string_lossy(),
|
||||
"-b",
|
||||
"feature/story-9946_story_nowork",
|
||||
])
|
||||
.current_dir(root)
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// Set up the story with max_retries=1 so it blocks immediately.
|
||||
crate::db::ensure_content_store();
|
||||
crate::db::write_content("9946_story_nowork", "---\nname: No Work Test\n---\n");
|
||||
crate::db::write_item_with_content(
|
||||
"9946_story_nowork",
|
||||
"2_current",
|
||||
"---\nname: No Work Test\n---\n",
|
||||
);
|
||||
|
||||
// Write a project.toml with max_retries = 1.
|
||||
fs::create_dir_all(root.join(".huskies")).unwrap();
|
||||
fs::write(
|
||||
root.join(".huskies/project.toml"),
|
||||
"max_retries = 1\n\n[[agent]]\nname = \"coder-1\"\nstage = \"coder\"\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let pool = AgentPool::new_test(3001);
|
||||
let mut rx = pool.watcher_tx.subscribe();
|
||||
|
||||
// Simulate coder failing gates with NO committed work on the worktree.
|
||||
pool.run_pipeline_advance(
|
||||
"9946_story_nowork",
|
||||
"coder-1",
|
||||
CompletionReport {
|
||||
summary: "Agent crashed".to_string(),
|
||||
gates_passed: false,
|
||||
gate_output: "Tests failed".to_string(),
|
||||
},
|
||||
Some(root.to_path_buf()),
|
||||
Some(wt_path),
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// With no committed work and max_retries=1, the story should be blocked.
|
||||
let mut got_blocked = false;
|
||||
while let Ok(evt) = rx.try_recv() {
|
||||
if let WatcherEvent::StoryBlocked { story_id, .. } = &evt
|
||||
&& story_id == "9946_story_nowork"
|
||||
{
|
||||
got_blocked = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
got_blocked,
|
||||
"Story with no committed work should be blocked after exceeding retry limit"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user