story-kit: merge 311_story_server_enforced_retry_limits_for_failed_merge_and_empty_diff_stories
This commit is contained in:
@@ -925,22 +925,30 @@ impl AgentPool {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog!(
|
||||
"[pipeline] Coder '{agent_name}' failed gates for '{story_id}'. Restarting."
|
||||
);
|
||||
let context = format!(
|
||||
"\n\n---\n## Previous Attempt Failed\n\
|
||||
The acceptance gates failed with the following output:\n{}\n\n\
|
||||
Please review the failures above, fix the issues, and try again.",
|
||||
completion.gate_output
|
||||
);
|
||||
if let Err(e) = self
|
||||
.start_agent(&project_root, story_id, Some(agent_name), Some(&context))
|
||||
.await
|
||||
{
|
||||
slog_error!(
|
||||
"[pipeline] Failed to restart coder '{agent_name}' for '{story_id}': {e}"
|
||||
// Increment retry count and check if blocked.
|
||||
let story_path = project_root
|
||||
.join(".story_kit/work/2_current")
|
||||
.join(format!("{story_id}.md"));
|
||||
if should_block_story(&story_path, config.max_retries, story_id, "coder") {
|
||||
// Story has exceeded retry limit — do not restart.
|
||||
} else {
|
||||
slog!(
|
||||
"[pipeline] Coder '{agent_name}' failed gates for '{story_id}'. Restarting."
|
||||
);
|
||||
let context = format!(
|
||||
"\n\n---\n## Previous Attempt Failed\n\
|
||||
The acceptance gates failed with the following output:\n{}\n\n\
|
||||
Please review the failures above, fix the issues, and try again.",
|
||||
completion.gate_output
|
||||
);
|
||||
if let Err(e) = self
|
||||
.start_agent(&project_root, story_id, Some(agent_name), Some(&context))
|
||||
.await
|
||||
{
|
||||
slog_error!(
|
||||
"[pipeline] Failed to restart coder '{agent_name}' for '{story_id}': {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1017,14 +1025,42 @@ impl AgentPool {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog!(
|
||||
"[pipeline] QA coverage gate failed for '{story_id}'. Restarting QA."
|
||||
);
|
||||
let story_path = project_root
|
||||
.join(".story_kit/work/3_qa")
|
||||
.join(format!("{story_id}.md"));
|
||||
if should_block_story(&story_path, config.max_retries, story_id, "qa-coverage") {
|
||||
// Story has exceeded retry limit — do not restart.
|
||||
} else {
|
||||
slog!(
|
||||
"[pipeline] QA coverage gate failed for '{story_id}'. Restarting QA."
|
||||
);
|
||||
let context = format!(
|
||||
"\n\n---\n## Coverage Gate Failed\n\
|
||||
The coverage gate (script/test_coverage) failed with the following output:\n{}\n\n\
|
||||
Please improve test coverage until the coverage gate passes.",
|
||||
coverage_output
|
||||
);
|
||||
if let Err(e) = self
|
||||
.start_agent(&project_root, story_id, Some("qa"), Some(&context))
|
||||
.await
|
||||
{
|
||||
slog_error!("[pipeline] Failed to restart qa for '{story_id}': {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let story_path = project_root
|
||||
.join(".story_kit/work/3_qa")
|
||||
.join(format!("{story_id}.md"));
|
||||
if should_block_story(&story_path, config.max_retries, story_id, "qa") {
|
||||
// Story has exceeded retry limit — do not restart.
|
||||
} else {
|
||||
slog!("[pipeline] QA failed gates for '{story_id}'. Restarting.");
|
||||
let context = format!(
|
||||
"\n\n---\n## Coverage Gate Failed\n\
|
||||
The coverage gate (script/test_coverage) failed with the following output:\n{}\n\n\
|
||||
Please improve test coverage until the coverage gate passes.",
|
||||
coverage_output
|
||||
"\n\n---\n## Previous QA Attempt Failed\n\
|
||||
The acceptance gates failed with the following output:\n{}\n\n\
|
||||
Please re-run and fix the issues.",
|
||||
completion.gate_output
|
||||
);
|
||||
if let Err(e) = self
|
||||
.start_agent(&project_root, story_id, Some("qa"), Some(&context))
|
||||
@@ -1033,20 +1069,6 @@ impl AgentPool {
|
||||
slog_error!("[pipeline] Failed to restart qa for '{story_id}': {e}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
slog!("[pipeline] QA failed gates for '{story_id}'. Restarting.");
|
||||
let context = format!(
|
||||
"\n\n---\n## Previous QA Attempt Failed\n\
|
||||
The acceptance gates failed with the following output:\n{}\n\n\
|
||||
Please re-run and fix the issues.",
|
||||
completion.gate_output
|
||||
);
|
||||
if let Err(e) = self
|
||||
.start_agent(&project_root, story_id, Some("qa"), Some(&context))
|
||||
.await
|
||||
{
|
||||
slog_error!("[pipeline] Failed to restart qa for '{story_id}': {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
PipelineStage::Mergemaster => {
|
||||
@@ -1103,27 +1125,34 @@ impl AgentPool {
|
||||
"[pipeline] Story '{story_id}' done. Worktree preserved for inspection."
|
||||
);
|
||||
} else {
|
||||
slog!(
|
||||
"[pipeline] Post-merge tests failed for '{story_id}'. Restarting mergemaster."
|
||||
);
|
||||
let context = format!(
|
||||
"\n\n---\n## Post-Merge Test Failed\n\
|
||||
The tests on master failed with the following output:\n{}\n\n\
|
||||
Please investigate and resolve the failures, then call merge_agent_work again.",
|
||||
output
|
||||
);
|
||||
if let Err(e) = self
|
||||
.start_agent(
|
||||
&project_root,
|
||||
story_id,
|
||||
Some("mergemaster"),
|
||||
Some(&context),
|
||||
)
|
||||
.await
|
||||
{
|
||||
slog_error!(
|
||||
"[pipeline] Failed to restart mergemaster for '{story_id}': {e}"
|
||||
let story_path = project_root
|
||||
.join(".story_kit/work/4_merge")
|
||||
.join(format!("{story_id}.md"));
|
||||
if should_block_story(&story_path, config.max_retries, story_id, "mergemaster") {
|
||||
// Story has exceeded retry limit — do not restart.
|
||||
} else {
|
||||
slog!(
|
||||
"[pipeline] Post-merge tests failed for '{story_id}'. Restarting mergemaster."
|
||||
);
|
||||
let context = format!(
|
||||
"\n\n---\n## Post-Merge Test Failed\n\
|
||||
The tests on master failed with the following output:\n{}\n\n\
|
||||
Please investigate and resolve the failures, then call merge_agent_work again.",
|
||||
output
|
||||
);
|
||||
if let Err(e) = self
|
||||
.start_agent(
|
||||
&project_root,
|
||||
story_id,
|
||||
Some("mergemaster"),
|
||||
Some(&context),
|
||||
)
|
||||
.await
|
||||
{
|
||||
slog_error!(
|
||||
"[pipeline] Failed to restart mergemaster for '{story_id}': {e}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1563,6 +1592,44 @@ impl AgentPool {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip blocked stories (retry limit exceeded).
|
||||
if is_story_blocked(project_root, stage_dir, story_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip stories in 4_merge/ that already have a reported merge failure.
|
||||
// These need human intervention — auto-assigning a new mergemaster
|
||||
// would just waste tokens on the same broken merge.
|
||||
if *stage == PipelineStage::Mergemaster
|
||||
&& has_merge_failure(project_root, stage_dir, story_id)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// AC6: Detect empty-diff stories in 4_merge/ before starting a
|
||||
// mergemaster. If the worktree has no commits on the feature branch,
|
||||
// write a merge_failure and block the story immediately.
|
||||
if *stage == PipelineStage::Mergemaster
|
||||
&& let Some(wt_path) = worktree::find_worktree_path(project_root, story_id)
|
||||
&& !super::gates::worktree_has_committed_work(&wt_path)
|
||||
{
|
||||
slog_warn!(
|
||||
"[auto-assign] Story '{story_id}' in 4_merge/ has no commits \
|
||||
on feature branch. Writing merge_failure and blocking."
|
||||
);
|
||||
let story_path = project_root
|
||||
.join(".story_kit/work")
|
||||
.join(stage_dir)
|
||||
.join(format!("{story_id}.md"));
|
||||
let _ = crate::io::story_metadata::write_merge_failure(
|
||||
&story_path,
|
||||
"Feature branch has no code changes — the coder agent \
|
||||
did not produce any commits.",
|
||||
);
|
||||
let _ = crate::io::story_metadata::write_blocked(&story_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Re-acquire the lock on each iteration to see state changes
|
||||
// from previous start_agent calls in the same pass.
|
||||
let preferred_agent =
|
||||
@@ -2195,6 +2262,80 @@ fn has_review_hold(project_root: &Path, stage_dir: &str, story_id: &str) -> bool
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Increment retry_count and block the story if it exceeds `max_retries`.
|
||||
///
|
||||
/// Returns `true` if the story is now blocked (caller should NOT restart the agent).
|
||||
/// Returns `false` if the story may be retried.
|
||||
/// When `max_retries` is 0, retry limits are disabled.
|
||||
fn should_block_story(story_path: &Path, max_retries: u32, story_id: &str, stage_label: &str) -> bool {
|
||||
use crate::io::story_metadata::{increment_retry_count, write_blocked};
|
||||
|
||||
if max_retries == 0 {
|
||||
// Retry limits disabled.
|
||||
return false;
|
||||
}
|
||||
|
||||
match increment_retry_count(story_path) {
|
||||
Ok(new_count) => {
|
||||
if new_count >= max_retries {
|
||||
slog_warn!(
|
||||
"[pipeline] Story '{story_id}' reached retry limit ({new_count}/{max_retries}) \
|
||||
at {stage_label} stage. Marking as blocked."
|
||||
);
|
||||
if let Err(e) = write_blocked(story_path) {
|
||||
slog_error!("[pipeline] Failed to write blocked flag for '{story_id}': {e}");
|
||||
}
|
||||
true
|
||||
} else {
|
||||
slog!(
|
||||
"[pipeline] Story '{story_id}' retry {new_count}/{max_retries} at {stage_label} stage."
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
slog_error!("[pipeline] Failed to increment retry_count for '{story_id}': {e}");
|
||||
false // Don't block on error — allow retry.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return `true` if the story file has `blocked: true` in its front matter.
|
||||
fn is_story_blocked(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
|
||||
use crate::io::story_metadata::parse_front_matter;
|
||||
let path = project_root
|
||||
.join(".story_kit")
|
||||
.join("work")
|
||||
.join(stage_dir)
|
||||
.join(format!("{story_id}.md"));
|
||||
let contents = match std::fs::read_to_string(path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return false,
|
||||
};
|
||||
parse_front_matter(&contents)
|
||||
.ok()
|
||||
.and_then(|m| m.blocked)
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Return `true` if the story file has a `merge_failure` field in its front matter.
|
||||
fn has_merge_failure(project_root: &Path, stage_dir: &str, story_id: &str) -> bool {
|
||||
use crate::io::story_metadata::parse_front_matter;
|
||||
let path = project_root
|
||||
.join(".story_kit")
|
||||
.join("work")
|
||||
.join(stage_dir)
|
||||
.join(format!("{story_id}.md"));
|
||||
let contents = match std::fs::read_to_string(path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return false,
|
||||
};
|
||||
parse_front_matter(&contents)
|
||||
.ok()
|
||||
.and_then(|m| m.merge_failure)
|
||||
.is_some()
|
||||
}
|
||||
|
||||
/// Return `true` if `agent_name` has no active (pending/running) entry in the pool.
|
||||
fn is_agent_free(agents: &HashMap<String, StoryAgent>, agent_name: &str) -> bool {
|
||||
!agents.values().any(|a| {
|
||||
@@ -2420,6 +2561,16 @@ async fn run_server_owned_completion(
|
||||
let path = wt_path;
|
||||
match tokio::task::spawn_blocking(move || {
|
||||
super::gates::check_uncommitted_changes(&path)?;
|
||||
// AC5: Fail early if the coder finished with no commits on the feature branch.
|
||||
// This prevents empty-diff stories from advancing through QA to merge.
|
||||
if !super::gates::worktree_has_committed_work(&path) {
|
||||
return Ok((
|
||||
false,
|
||||
"Agent exited with no commits on the feature branch. \
|
||||
The agent did not produce any code changes."
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
super::gates::run_acceptance_gates(&path)
|
||||
})
|
||||
.await
|
||||
|
||||
Reference in New Issue
Block a user