From e65f6ace84d0e1ce9f6c0351f4826a06b55251c9 Mon Sep 17 00:00:00 2001 From: Timmy Date: Tue, 12 May 2026 17:01:24 +0100 Subject: [PATCH] fix: get_agent_output no longer panics on tool_result content with multi-byte UTF-8 at byte 500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit agent_log::format::format_log_entry_as_text was truncating long tool_result strings via the naive byte slice `&content_str[..500]`. When byte 500 fell inside a multi-byte UTF-8 codepoint (box-drawing chars like '─', smart quotes, emoji), the slice panicked, propagating up through the MCP get_agent_output dispatcher and surfacing as an internal-error response. This blocked any diagnostic readout of a coder's session that had emitted tool output containing those chars. Walk back to the nearest char boundary with `is_char_boundary` before slicing. Regression test asserts the formatter doesn't panic on a 599-byte string with a 3-byte '─' straddling byte 500. Co-Authored-By: Claude Opus 4.7 (1M context) --- server/src/agent_log/format.rs | 51 +++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/server/src/agent_log/format.rs b/server/src/agent_log/format.rs index 428a2bce..e38a46e4 100644 --- a/server/src/agent_log/format.rs +++ b/server/src/agent_log/format.rs @@ -104,9 +104,19 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O None => String::new(), }; let display = if content_str.len() > 500 { + // Walk back to the nearest char boundary so we + // don't panic when the 500-byte mark lands + // inside a multi-byte UTF-8 codepoint (e.g. + // box-drawing chars like '─', smart quotes, + // emoji). `is_char_boundary(len)` is always + // true so the loop terminates. + let mut end = 500; + while !content_str.is_char_boundary(end) { + end -= 1; + } format!( "{}... [{} chars total]", - &content_str[..500], + &content_str[..end], content_str.len() ) } else { @@ -129,3 +139,42 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O _ => None, } } + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + /// Regression: a tool_result whose content is >500 bytes AND has a + /// multi-byte UTF-8 codepoint straddling byte 500 must not panic. + /// Previously `&content_str[..500]` would slice mid-codepoint and crash + /// the get_agent_output MCP tool. + #[test] + fn tool_result_truncation_handles_multibyte_at_boundary() { + // 498 ASCII filler + a 3-byte '─' (U+2500) starting at byte 499 + + // 100 more ASCII chars. The naive `..500` slice would land inside + // the box-drawing char and panic. + let mut content = "a".repeat(499); + content.push('─'); + content.push_str(&"b".repeat(100)); + assert!(content.len() > 500); + assert!(!content.is_char_boundary(500)); + + let event = json!({ + "type": "agent_json", + "agent_name": "coder-1", + "data": { + "type": "user", + "message": { + "content": [{ "type": "tool_result", "content": content }] + } + } + }); + + let out = format_log_entry_as_text("2026-05-12T15:30:00.000000Z", &event); + assert!(out.is_some(), "tool_result must format without panicking"); + let s = out.unwrap(); + assert!(s.contains("RESULT:")); + assert!(s.contains("chars total")); + } +}