Compare commits

...

4 Commits

Author SHA1 Message Date
Timmy e65f6ace84 fix: get_agent_output no longer panics on tool_result content with multi-byte UTF-8 at byte 500
agent_log::format::format_log_entry_as_text was truncating long tool_result
strings via the naive byte slice `&content_str[..500]`. When byte 500 fell
inside a multi-byte UTF-8 codepoint (box-drawing chars like '─', smart
quotes, emoji), the slice panicked, propagating up through the MCP
get_agent_output dispatcher and surfacing as an internal-error response.
This blocked any diagnostic readout of a coder's session that had emitted
tool output containing those chars.

Walk back to the nearest char boundary with `is_char_boundary` before
slicing. Regression test asserts the formatter doesn't panic on a 599-byte
string with a 3-byte '─' straddling byte 500.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 17:01:24 +01:00
dave 3891de685c huskies: merge 888 2026-05-12 15:48:38 +00:00
Timmy d04facd24f style: cargo fmt on pty/mod.rs (916 landed with a manually line-broken string literal)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 16:41:58 +01:00
dave 734597902f huskies: merge 915 2026-05-12 15:38:25 +00:00
6 changed files with 249 additions and 41 deletions
+50 -1
View File
@@ -104,9 +104,19 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
None => String::new(),
};
let display = if content_str.len() > 500 {
// Walk back to the nearest char boundary so we
// don't panic when the 500-byte mark lands
// inside a multi-byte UTF-8 codepoint (e.g.
// box-drawing chars like '─', smart quotes,
// emoji). `is_char_boundary(len)` is always
// true so the loop terminates.
let mut end = 500;
while !content_str.is_char_boundary(end) {
end -= 1;
}
format!(
"{}... [{} chars total]",
&content_str[..500],
&content_str[..end],
content_str.len()
)
} else {
@@ -129,3 +139,42 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
/// Regression: a tool_result whose content is >500 bytes AND has a
/// multi-byte UTF-8 codepoint straddling byte 500 must not panic.
/// Previously `&content_str[..500]` would slice mid-codepoint and crash
/// the get_agent_output MCP tool.
#[test]
fn tool_result_truncation_handles_multibyte_at_boundary() {
// 498 ASCII filler + a 3-byte '─' (U+2500) starting at byte 499 +
// 100 more ASCII chars. The naive `..500` slice would land inside
// the box-drawing char and panic.
let mut content = "a".repeat(499);
content.push('─');
content.push_str(&"b".repeat(100));
assert!(content.len() > 500);
assert!(!content.is_char_boundary(500));
let event = json!({
"type": "agent_json",
"agent_name": "coder-1",
"data": {
"type": "user",
"message": {
"content": [{ "type": "tool_result", "content": content }]
}
}
});
let out = format_log_entry_as_text("2026-05-12T15:30:00.000000Z", &event);
assert!(out.is_some(), "tool_result must format without panicking");
let s = out.unwrap();
assert!(s.contains("RESULT:"));
assert!(s.contains("chars total"));
}
}
+1 -2
View File
@@ -219,8 +219,7 @@ mod tests {
let tmp = tempfile::tempdir().unwrap();
let script = tmp.path().join("emit_then_wait.sh");
let body =
"#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"hard_block\",\"reset_at\":\"2099-01-01T12:00:00Z\"}}'\nsleep 3\n";
let body = "#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"hard_block\",\"reset_at\":\"2099-01-01T12:00:00Z\"}}'\nsleep 3\n";
std::fs::write(&script, body).unwrap();
std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();
+43
View File
@@ -272,6 +272,49 @@ mod tests {
);
}
/// AC4 regression (chat path): set [1,2,3] → clear → replace [4,5].
/// Verifies each write is reflected in CRDT and that replace does not append.
#[test]
fn depends_set_clear_replace_regression() {
let tmp = tempfile::TempDir::new().unwrap();
crate::crdt_state::init_for_test();
write_story_file(
tmp.path(),
"1_backlog",
"9920_story_scr.md",
"---\nname: SCR\n---\n",
);
// Set to [1, 2, 3].
let out = depends_cmd_with_root(tmp.path(), "9920 1 2 3").unwrap();
assert!(out.contains("1"), "response should mention dep 1: {out}");
let view = crate::crdt_state::read_item("9920_story_scr").expect("CRDT must have story");
assert_eq!(
view.depends_on,
Some(vec![1, 2, 3]),
"CRDT should hold [1,2,3]: {view:?}"
);
// Clear.
let out = depends_cmd_with_root(tmp.path(), "9920").unwrap();
assert!(out.contains("Cleared"), "clear should confirm: {out}");
let view = crate::crdt_state::read_item("9920_story_scr").expect("CRDT must have story");
assert_eq!(
view.depends_on, None,
"CRDT should be None after clear: {view:?}"
);
// Replace with [4, 5] — must not append to old list.
let out = depends_cmd_with_root(tmp.path(), "9920 4 5").unwrap();
assert!(out.contains("4"), "response should mention dep 4: {out}");
let view = crate::crdt_state::read_item("9920_story_scr").expect("CRDT must have story");
assert_eq!(
view.depends_on,
Some(vec![4, 5]),
"CRDT should hold exactly [4,5] after replace: {view:?}"
);
}
#[test]
fn depends_finds_story_in_any_stage() {
let tmp = tempfile::TempDir::new().unwrap();
+51 -32
View File
@@ -140,13 +140,14 @@ pub(crate) fn tool_check_criterion(args: &Value, ctx: &AppContext) -> Result<Str
let root = ctx.state.get_project_root()?;
// Best-effort validation: log a warning if no corroborating evidence is found.
// Always proceeds regardless of validation outcome (operator override).
// Hard gate: reject if no corroborating evidence exists for this criterion.
if let Ok(contents) = crate::http::workflow::read_story_content(&root, story_id) {
let ac_text = find_unchecked_criterion_text(&contents, criterion_index).unwrap_or_default();
if let Ok(workflow) = ctx.workflow.lock() {
validate_criterion_check(&root, story_id, &ac_text, &workflow);
}
let workflow = ctx
.workflow
.lock()
.map_err(|e| format!("Lock error: {e}"))?;
validate_criterion_check(&root, story_id, &ac_text, &workflow)?;
}
check_criterion_in_file(&root, story_id, criterion_index)?;
@@ -171,21 +172,21 @@ fn find_unchecked_criterion_text(contents: &str, criterion_index: usize) -> Opti
None
}
/// Run best-effort validation before marking a criterion as checked.
/// Validate that there is corroborating evidence before marking a criterion done.
///
/// Checks three signals (OR-joined):
/// - A: feature branch has at least one commit since master
/// - B: AC text mentions a file path that the branch's commits touched
/// - C: a recorded test name fuzzy-matches the AC text
///
/// Logs a WARN if none of the signals are satisfied. Always returns `()` so
/// the caller can proceed regardless (operator override).
/// Returns `Ok(())` when at least one signal passes, or `Err(message)` describing
/// what evidence is missing so the agent knows what to do next.
fn validate_criterion_check(
project_root: &Path,
story_id: &str,
ac_text: &str,
workflow: &WorkflowState,
) {
) -> Result<(), String> {
let branch = format!("feature/story-{story_id}");
// ── A: branch has commits vs master ──────────────────────────────────────
@@ -199,8 +200,7 @@ fn validate_criterion_check(
.unwrap_or_default();
if !commits.is_empty() {
// A passes — branch has real work. Validation satisfied.
return;
return Ok(());
}
// A fails. Check B and C as fallback evidence.
@@ -230,7 +230,7 @@ fn validate_criterion_check(
});
if file_mentioned {
return;
return Ok(());
}
// ── C: a recorded test name fuzzy-matches the AC text ────────────────────
@@ -251,15 +251,16 @@ fn validate_criterion_check(
.any(|word| names.iter().any(|n| n.contains(word)))
});
if !test_match {
slog_warn!(
"[check_criterion] story '{}': no corroborating evidence for criterion '{}' \
— branch '{}' has no commits vs master and no matching files or tests",
story_id,
ac_text,
branch
);
if test_match {
return Ok(());
}
Err(format!(
"No corroborating evidence for criterion '{ac_text}'. \
To proceed: commit your work to '{branch}' (currently has no commits vs master), \
add a passing test whose name matches the criterion, \
or change a file mentioned in the criterion text."
))
}
pub(crate) fn tool_edit_criterion(args: &Value, ctx: &AppContext) -> Result<String, String> {
@@ -587,7 +588,7 @@ mod tests {
}
#[test]
fn tool_check_criterion_empty_branch_logs_warning() {
fn tool_check_criterion_empty_branch_returns_error() {
let tmp = tempfile::tempdir().unwrap();
// Init a git repo with an initial commit on master.
@@ -639,21 +640,27 @@ mod tests {
&ctx,
);
// Validation is best-effort — check still proceeds.
// No evidence — must error and NOT mark the criterion.
assert!(
result.is_ok(),
"Expected ok despite empty branch: {result:?}"
result.is_err(),
"Expected error when branch has no commits: {result:?}"
);
let err = result.unwrap_err();
assert!(
err.contains("No corroborating evidence"),
"Error should describe missing evidence, got: {err}"
);
assert!(
err.contains("feature/story-9997_empty_branch"),
"Error should name the branch, got: {err}"
);
// A warning should be in the global log buffer.
let warnings = crate::log_buffer::global().get_recent(
200,
Some("9997_empty_branch"),
Some(&crate::log_buffer::LogLevel::Warn),
);
// Criterion must still be unchecked in the CRDT.
let contents = crate::db::read_content("9997_empty_branch")
.expect("story content should still be in CRDT");
assert!(
!warnings.is_empty(),
"Expected a validation warning for empty branch check_criterion"
contents.contains("- [ ] Implement the feature"),
"Criterion should remain unchecked after rejected check_criterion"
);
}
@@ -691,6 +698,18 @@ mod tests {
);
let ctx = test_ctx(tmp.path());
// Provide signal-C evidence: a test whose name matches "first" from the criterion text.
tool_record_tests(
&json!({
"story_id": "9904_test",
"unit": [{"name": "first_criterion_check", "status": "pass"}],
"integration": []
}),
&ctx,
)
.unwrap();
let result = tool_check_criterion(
&json!({"story_id": "9904_test", "criterion_index": 0}),
&ctx,
@@ -212,6 +212,102 @@ mod tests {
);
}
/// AC4 regression: set [1,2,3] → clear [] → replace [4,5] — CRDT reflects
/// each write, and replace never appends.
#[test]
fn tool_update_story_depends_on_set_clear_replace() {
let tmp = tempfile::tempdir().unwrap();
crate::crdt_state::init_for_test();
setup_story_for_update(
tmp.path(),
"888_deps_scr",
"---\nname: Deps SCR\n---\n\nNo sections.\n",
);
let ctx = test_ctx(tmp.path());
// Set to [1, 2, 3].
let r = tool_update_story(
&json!({"story_id": "888_deps_scr", "front_matter": {"depends_on": [1, 2, 3]}}),
&ctx,
);
assert!(r.is_ok(), "set [1,2,3] should succeed: {r:?}");
let view = crate::crdt_state::read_item("888_deps_scr").expect("CRDT must have story");
assert_eq!(
view.depends_on,
Some(vec![1, 2, 3]),
"CRDT should hold [1,2,3] after set"
);
// Clear to [].
let r = tool_update_story(
&json!({"story_id": "888_deps_scr", "front_matter": {"depends_on": []}}),
&ctx,
);
assert!(r.is_ok(), "clear [] should succeed: {r:?}");
let view = crate::crdt_state::read_item("888_deps_scr").expect("CRDT must have story");
assert_eq!(
view.depends_on, None,
"CRDT should be None after clearing to []"
);
// Replace with [4, 5] — must not append to previous [1,2,3].
let r = tool_update_story(
&json!({"story_id": "888_deps_scr", "front_matter": {"depends_on": [4, 5]}}),
&ctx,
);
assert!(r.is_ok(), "replace [4,5] should succeed: {r:?}");
let view = crate::crdt_state::read_item("888_deps_scr").expect("CRDT must have story");
assert_eq!(
view.depends_on,
Some(vec![4, 5]),
"CRDT should hold exactly [4,5] after replace (not [1,2,3,4,5])"
);
}
/// Regression: clearing depends_on must survive a subsequent update to another
/// field. Before the fix, write_story_content would restore the old YAML
/// depends_on value into the CRDT register, overwriting the clear.
#[test]
fn tool_update_story_clear_depends_on_survives_subsequent_update() {
let tmp = tempfile::tempdir().unwrap();
crate::crdt_state::init_for_test();
// Story created WITH depends_on in YAML so write_story_content would
// previously restore it.
setup_story_for_update(
tmp.path(),
"888_deps_persist",
"---\nname: Deps Persist\ndepends_on: [100, 200]\n---\n\nNo sections.\n",
);
let ctx = test_ctx(tmp.path());
// Seed CRDT with the YAML deps (simulates the initial write path).
crate::crdt_state::set_depends_on("888_deps_persist", &[100, 200]);
// Clear deps via update_story.
let r = tool_update_story(
&json!({"story_id": "888_deps_persist", "front_matter": {"depends_on": []}}),
&ctx,
);
assert!(r.is_ok(), "clear should succeed: {r:?}");
let view = crate::crdt_state::read_item("888_deps_persist").expect("CRDT must have story");
assert_eq!(view.depends_on, None, "CRDT should be None after clear");
// Now update a different field — this triggers write_story_content with
// the stale YAML (which still has depends_on: [100, 200]).
let r = tool_update_story(
&json!({"story_id": "888_deps_persist", "name": "Deps Persist Updated"}),
&ctx,
);
assert!(r.is_ok(), "subsequent name update should succeed: {r:?}");
// The CRDT must still be None — the YAML value must not have been restored.
let view = crate::crdt_state::read_item("888_deps_persist").expect("CRDT must have story");
assert_eq!(
view.depends_on, None,
"CRDT depends_on must remain None after unrelated update (write_story_content must not restore YAML value)"
);
}
#[test]
fn tool_update_story_depends_on_routes_to_crdt_not_yaml() {
let tmp = tempfile::tempdir().unwrap();
+8 -6
View File
@@ -17,12 +17,14 @@ pub(crate) fn write_story_content(
stage: &str,
content: &str,
) {
crate::db::write_item_with_content(
story_id,
stage,
content,
crate::db::ItemMeta::from_yaml(content),
);
let mut meta = crate::db::ItemMeta::from_yaml(content);
// CRDT is the single source of truth for depends_on. Never overwrite the
// register from YAML here — the typed setter (crdt_state::set_depends_on)
// is the only authorised write path. Passing None leaves the existing
// register untouched on update and initialises new items to "" so the
// explicit set_depends_on call in each create function takes effect.
meta.depends_on = None;
crate::db::write_item_with_content(story_id, stage, content, meta);
}
/// Determine what stage a story is in (from CRDT).