fix: get_agent_output no longer panics on tool_result content with multi-byte UTF-8 at byte 500

agent_log::format::format_log_entry_as_text was truncating long tool_result strings via the naive byte slice `&content_str[..500]`. When byte 500 fell inside a multi-byte UTF-8 codepoint (box-drawing chars like '─', smart quotes, emoji), the slice panicked, propagating up through the MCP get_agent_output dispatcher and surfacing as an internal-error response. This blocked any diagnostic readout of a coder's session that had emitted tool output containing those chars. Walk back to the nearest char boundary with `is_char_boundary` before slicing. Regression test asserts the formatter doesn't panic on a 599-byte string with a 3-byte '─' straddling byte 500. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
huskies: merge 888
2026-05-12 17:01:24 +01:00 · 2026-05-12 15:48:38 +00:00 · 2026-05-12 16:41:58 +01:00 · 2026-05-12 15:38:25 +00:00
6 changed files with 249 additions and 41 deletions
@@ -104,9 +104,19 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
                                None => String::new(),
                            };
                            let display = if content_str.len() > 500 {
+                                // Walk back to the nearest char boundary so we
+                                // don't panic when the 500-byte mark lands
+                                // inside a multi-byte UTF-8 codepoint (e.g.
+                                // box-drawing chars like '─', smart quotes,
+                                // emoji). `is_char_boundary(len)` is always
+                                // true so the loop terminates.
+                                let mut end = 500;
+                                while !content_str.is_char_boundary(end) {
+                                    end -= 1;
+                                }
                                format!(
                                    "{}... [{} chars total]",
-                                    &content_str[..500],
+                                    &content_str[..end],
                                    content_str.len()
                                )
                            } else {
@@ -129,3 +139,42 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
        _ => None,
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    /// Regression: a tool_result whose content is >500 bytes AND has a
+    /// multi-byte UTF-8 codepoint straddling byte 500 must not panic.
+    /// Previously `&content_str[..500]` would slice mid-codepoint and crash
+    /// the get_agent_output MCP tool.
+    #[test]
+    fn tool_result_truncation_handles_multibyte_at_boundary() {
+        // 498 ASCII filler + a 3-byte '─' (U+2500) starting at byte 499 +
+        // 100 more ASCII chars.  The naive `..500` slice would land inside
+        // the box-drawing char and panic.
+        let mut content = "a".repeat(499);
+        content.push('─');
+        content.push_str(&"b".repeat(100));
+        assert!(content.len() > 500);
+        assert!(!content.is_char_boundary(500));
+
+        let event = json!({
+            "type": "agent_json",
+            "agent_name": "coder-1",
+            "data": {
+                "type": "user",
+                "message": {
+                    "content": [{ "type": "tool_result", "content": content }]
+                }
+            }
+        });
+
+        let out = format_log_entry_as_text("2026-05-12T15:30:00.000000Z", &event);
+        assert!(out.is_some(), "tool_result must format without panicking");
+        let s = out.unwrap();
+        assert!(s.contains("RESULT:"));
+        assert!(s.contains("chars total"));
+    }
+}
@@ -219,8 +219,7 @@ mod tests {

        let tmp = tempfile::tempdir().unwrap();
        let script = tmp.path().join("emit_then_wait.sh");
-        let body =
-            "#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"hard_block\",\"reset_at\":\"2099-01-01T12:00:00Z\"}}'\nsleep 3\n";
+        let body = "#!/bin/sh\nprintf '%s\\n' '{\"type\":\"rate_limit_event\",\"rate_limit_info\":{\"status\":\"hard_block\",\"reset_at\":\"2099-01-01T12:00:00Z\"}}'\nsleep 3\n";
        std::fs::write(&script, body).unwrap();
        std::fs::set_permissions(&script, std::fs::Permissions::from_mode(0o755)).unwrap();

@@ -272,6 +272,49 @@ mod tests {
        );
    }

+    /// AC4 regression (chat path): set [1,2,3] → clear → replace [4,5].
+    /// Verifies each write is reflected in CRDT and that replace does not append.
+    #[test]
+    fn depends_set_clear_replace_regression() {
+        let tmp = tempfile::TempDir::new().unwrap();
+        crate::crdt_state::init_for_test();
+        write_story_file(
+            tmp.path(),
+            "1_backlog",
+            "9920_story_scr.md",
+            "---\nname: SCR\n---\n",
+        );
+
+        // Set to [1, 2, 3].
+        let out = depends_cmd_with_root(tmp.path(), "9920 1 2 3").unwrap();
+        assert!(out.contains("1"), "response should mention dep 1: {out}");
+        let view = crate::crdt_state::read_item("9920_story_scr").expect("CRDT must have story");
+        assert_eq!(
+            view.depends_on,
+            Some(vec![1, 2, 3]),
+            "CRDT should hold [1,2,3]: {view:?}"
+        );
+
+        // Clear.
+        let out = depends_cmd_with_root(tmp.path(), "9920").unwrap();
+        assert!(out.contains("Cleared"), "clear should confirm: {out}");
+        let view = crate::crdt_state::read_item("9920_story_scr").expect("CRDT must have story");
+        assert_eq!(
+            view.depends_on, None,
+            "CRDT should be None after clear: {view:?}"
+        );
+
+        // Replace with [4, 5] — must not append to old list.
+        let out = depends_cmd_with_root(tmp.path(), "9920 4 5").unwrap();
+        assert!(out.contains("4"), "response should mention dep 4: {out}");
+        let view = crate::crdt_state::read_item("9920_story_scr").expect("CRDT must have story");
+        assert_eq!(
+            view.depends_on,
+            Some(vec![4, 5]),
+            "CRDT should hold exactly [4,5] after replace: {view:?}"
+        );
+    }
+
    #[test]
    fn depends_finds_story_in_any_stage() {
        let tmp = tempfile::TempDir::new().unwrap();
@@ -140,13 +140,14 @@ pub(crate) fn tool_check_criterion(args: &Value, ctx: &AppContext) -> Result<Str

    let root = ctx.state.get_project_root()?;

-    // Best-effort validation: log a warning if no corroborating evidence is found.
-    // Always proceeds regardless of validation outcome (operator override).
+    // Hard gate: reject if no corroborating evidence exists for this criterion.
    if let Ok(contents) = crate::http::workflow::read_story_content(&root, story_id) {
        let ac_text = find_unchecked_criterion_text(&contents, criterion_index).unwrap_or_default();
-        if let Ok(workflow) = ctx.workflow.lock() {
-            validate_criterion_check(&root, story_id, &ac_text, &workflow);
-        }
+        let workflow = ctx
+            .workflow
+            .lock()
+            .map_err(|e| format!("Lock error: {e}"))?;
+        validate_criterion_check(&root, story_id, &ac_text, &workflow)?;
    }

    check_criterion_in_file(&root, story_id, criterion_index)?;
@@ -171,21 +172,21 @@ fn find_unchecked_criterion_text(contents: &str, criterion_index: usize) -> Opti
    None
 }

-/// Run best-effort validation before marking a criterion as checked.
+/// Validate that there is corroborating evidence before marking a criterion done.
 ///
 /// Checks three signals (OR-joined):
 /// - A: feature branch has at least one commit since master
 /// - B: AC text mentions a file path that the branch's commits touched
 /// - C: a recorded test name fuzzy-matches the AC text
 ///
-/// Logs a WARN if none of the signals are satisfied. Always returns `()` so
-/// the caller can proceed regardless (operator override).
+/// Returns `Ok(())` when at least one signal passes, or `Err(message)` describing
+/// what evidence is missing so the agent knows what to do next.
 fn validate_criterion_check(
    project_root: &Path,
    story_id: &str,
    ac_text: &str,
    workflow: &WorkflowState,
-) {
+) -> Result<(), String> {
    let branch = format!("feature/story-{story_id}");

    // ── A: branch has commits vs master ──────────────────────────────────────
@@ -199,8 +200,7 @@ fn validate_criterion_check(
        .unwrap_or_default();

    if !commits.is_empty() {
-        // A passes — branch has real work. Validation satisfied.
-        return;
+        return Ok(());
    }

    // A fails. Check B and C as fallback evidence.
@@ -230,7 +230,7 @@ fn validate_criterion_check(
    });

    if file_mentioned {
-        return;
+        return Ok(());
    }

    // ── C: a recorded test name fuzzy-matches the AC text ────────────────────
@@ -251,15 +251,16 @@ fn validate_criterion_check(
            .any(|word| names.iter().any(|n| n.contains(word)))
    });

-    if !test_match {
-        slog_warn!(
-            "[check_criterion] story '{}': no corroborating evidence for criterion '{}' \
-             — branch '{}' has no commits vs master and no matching files or tests",
-            story_id,
-            ac_text,
-            branch
-        );
+    if test_match {
+        return Ok(());
    }
+
+    Err(format!(
+        "No corroborating evidence for criterion '{ac_text}'. \
+         To proceed: commit your work to '{branch}' (currently has no commits vs master), \
+         add a passing test whose name matches the criterion, \
+         or change a file mentioned in the criterion text."
+    ))
 }

 pub(crate) fn tool_edit_criterion(args: &Value, ctx: &AppContext) -> Result<String, String> {
@@ -587,7 +588,7 @@ mod tests {
    }

    #[test]
-    fn tool_check_criterion_empty_branch_logs_warning() {
+    fn tool_check_criterion_empty_branch_returns_error() {
        let tmp = tempfile::tempdir().unwrap();

        // Init a git repo with an initial commit on master.
@@ -639,21 +640,27 @@ mod tests {
            &ctx,
        );

-        // Validation is best-effort — check still proceeds.
+        // No evidence — must error and NOT mark the criterion.
        assert!(
-            result.is_ok(),
-            "Expected ok despite empty branch: {result:?}"
+            result.is_err(),
+            "Expected error when branch has no commits: {result:?}"
+        );
+        let err = result.unwrap_err();
+        assert!(
+            err.contains("No corroborating evidence"),
+            "Error should describe missing evidence, got: {err}"
+        );
+        assert!(
+            err.contains("feature/story-9997_empty_branch"),
+            "Error should name the branch, got: {err}"
        );

-        // A warning should be in the global log buffer.
-        let warnings = crate::log_buffer::global().get_recent(
-            200,
-            Some("9997_empty_branch"),
-            Some(&crate::log_buffer::LogLevel::Warn),
-        );
+        // Criterion must still be unchecked in the CRDT.
+        let contents = crate::db::read_content("9997_empty_branch")
+            .expect("story content should still be in CRDT");
        assert!(
-            !warnings.is_empty(),
-            "Expected a validation warning for empty branch check_criterion"
+            contents.contains("- [ ] Implement the feature"),
+            "Criterion should remain unchecked after rejected check_criterion"
        );
    }

@@ -691,6 +698,18 @@ mod tests {
        );

        let ctx = test_ctx(tmp.path());
+
+        // Provide signal-C evidence: a test whose name matches "first" from the criterion text.
+        tool_record_tests(
+            &json!({
+                "story_id": "9904_test",
+                "unit": [{"name": "first_criterion_check", "status": "pass"}],
+                "integration": []
+            }),
+            &ctx,
+        )
+        .unwrap();
+
        let result = tool_check_criterion(
            &json!({"story_id": "9904_test", "criterion_index": 0}),
            &ctx,
@@ -212,6 +212,102 @@ mod tests {
        );
    }

+    /// AC4 regression: set [1,2,3] → clear [] → replace [4,5] — CRDT reflects
+    /// each write, and replace never appends.
+    #[test]
+    fn tool_update_story_depends_on_set_clear_replace() {
+        let tmp = tempfile::tempdir().unwrap();
+        crate::crdt_state::init_for_test();
+        setup_story_for_update(
+            tmp.path(),
+            "888_deps_scr",
+            "---\nname: Deps SCR\n---\n\nNo sections.\n",
+        );
+        let ctx = test_ctx(tmp.path());
+
+        // Set to [1, 2, 3].
+        let r = tool_update_story(
+            &json!({"story_id": "888_deps_scr", "front_matter": {"depends_on": [1, 2, 3]}}),
+            &ctx,
+        );
+        assert!(r.is_ok(), "set [1,2,3] should succeed: {r:?}");
+        let view = crate::crdt_state::read_item("888_deps_scr").expect("CRDT must have story");
+        assert_eq!(
+            view.depends_on,
+            Some(vec![1, 2, 3]),
+            "CRDT should hold [1,2,3] after set"
+        );
+
+        // Clear to [].
+        let r = tool_update_story(
+            &json!({"story_id": "888_deps_scr", "front_matter": {"depends_on": []}}),
+            &ctx,
+        );
+        assert!(r.is_ok(), "clear [] should succeed: {r:?}");
+        let view = crate::crdt_state::read_item("888_deps_scr").expect("CRDT must have story");
+        assert_eq!(
+            view.depends_on, None,
+            "CRDT should be None after clearing to []"
+        );
+
+        // Replace with [4, 5] — must not append to previous [1,2,3].
+        let r = tool_update_story(
+            &json!({"story_id": "888_deps_scr", "front_matter": {"depends_on": [4, 5]}}),
+            &ctx,
+        );
+        assert!(r.is_ok(), "replace [4,5] should succeed: {r:?}");
+        let view = crate::crdt_state::read_item("888_deps_scr").expect("CRDT must have story");
+        assert_eq!(
+            view.depends_on,
+            Some(vec![4, 5]),
+            "CRDT should hold exactly [4,5] after replace (not [1,2,3,4,5])"
+        );
+    }
+
+    /// Regression: clearing depends_on must survive a subsequent update to another
+    /// field.  Before the fix, write_story_content would restore the old YAML
+    /// depends_on value into the CRDT register, overwriting the clear.
+    #[test]
+    fn tool_update_story_clear_depends_on_survives_subsequent_update() {
+        let tmp = tempfile::tempdir().unwrap();
+        crate::crdt_state::init_for_test();
+        // Story created WITH depends_on in YAML so write_story_content would
+        // previously restore it.
+        setup_story_for_update(
+            tmp.path(),
+            "888_deps_persist",
+            "---\nname: Deps Persist\ndepends_on: [100, 200]\n---\n\nNo sections.\n",
+        );
+        let ctx = test_ctx(tmp.path());
+
+        // Seed CRDT with the YAML deps (simulates the initial write path).
+        crate::crdt_state::set_depends_on("888_deps_persist", &[100, 200]);
+
+        // Clear deps via update_story.
+        let r = tool_update_story(
+            &json!({"story_id": "888_deps_persist", "front_matter": {"depends_on": []}}),
+            &ctx,
+        );
+        assert!(r.is_ok(), "clear should succeed: {r:?}");
+        let view = crate::crdt_state::read_item("888_deps_persist").expect("CRDT must have story");
+        assert_eq!(view.depends_on, None, "CRDT should be None after clear");
+
+        // Now update a different field — this triggers write_story_content with
+        // the stale YAML (which still has depends_on: [100, 200]).
+        let r = tool_update_story(
+            &json!({"story_id": "888_deps_persist", "name": "Deps Persist Updated"}),
+            &ctx,
+        );
+        assert!(r.is_ok(), "subsequent name update should succeed: {r:?}");
+
+        // The CRDT must still be None — the YAML value must not have been restored.
+        let view = crate::crdt_state::read_item("888_deps_persist").expect("CRDT must have story");
+        assert_eq!(
+            view.depends_on, None,
+            "CRDT depends_on must remain None after unrelated update (write_story_content must not restore YAML value)"
+        );
+    }
+
    #[test]
    fn tool_update_story_depends_on_routes_to_crdt_not_yaml() {
        let tmp = tempfile::tempdir().unwrap();
@@ -17,12 +17,14 @@ pub(crate) fn write_story_content(
    stage: &str,
    content: &str,
 ) {
-    crate::db::write_item_with_content(
-        story_id,
-        stage,
-        content,
-        crate::db::ItemMeta::from_yaml(content),
-    );
+    let mut meta = crate::db::ItemMeta::from_yaml(content);
+    // CRDT is the single source of truth for depends_on.  Never overwrite the
+    // register from YAML here — the typed setter (crdt_state::set_depends_on)
+    // is the only authorised write path.  Passing None leaves the existing
+    // register untouched on update and initialises new items to "" so the
+    // explicit set_depends_on call in each create function takes effect.
+    meta.depends_on = None;
+    crate::db::write_item_with_content(story_id, stage, content, meta);
 }

 /// Determine what stage a story is in (from CRDT).
Author	SHA1	Message	Date
Timmy	e65f6ace84	fix: get_agent_output no longer panics on tool_result content with multi-byte UTF-8 at byte 500 agent_log::format::format_log_entry_as_text was truncating long tool_result strings via the naive byte slice `&content_str[..500]`. When byte 500 fell inside a multi-byte UTF-8 codepoint (box-drawing chars like '─', smart quotes, emoji), the slice panicked, propagating up through the MCP get_agent_output dispatcher and surfacing as an internal-error response. This blocked any diagnostic readout of a coder's session that had emitted tool output containing those chars. Walk back to the nearest char boundary with `is_char_boundary` before slicing. Regression test asserts the formatter doesn't panic on a 599-byte string with a 3-byte '─' straddling byte 500. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-12 17:01:24 +01:00
dave	3891de685c	huskies: merge 888	2026-05-12 15:48:38 +00:00
Timmy	d04facd24f	style: cargo fmt on pty/mod.rs (916 landed with a manually line-broken string literal) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-12 16:41:58 +01:00
dave	734597902f	huskies: merge 915	2026-05-12 15:38:25 +00:00