fix: get_agent_output no longer panics on tool_result content with multi-byte UTF-8 at byte 500

agent_log::format::format_log_entry_as_text was truncating long tool_result
strings via the naive byte slice `&content_str[..500]`. When byte 500 fell
inside a multi-byte UTF-8 codepoint (box-drawing chars like '─', smart
quotes, emoji), the slice panicked, propagating up through the MCP
get_agent_output dispatcher and surfacing as an internal-error response.
This blocked any diagnostic readout of a coder's session that had emitted
tool output containing those chars.

Walk back to the nearest char boundary with `is_char_boundary` before
slicing. Regression test asserts the formatter doesn't panic on a 599-byte
string with a 3-byte '─' straddling byte 500.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Timmy
2026-05-12 17:01:24 +01:00
parent 3891de685c
commit e65f6ace84
+50 -1
View File
@@ -104,9 +104,19 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
None => String::new(), None => String::new(),
}; };
let display = if content_str.len() > 500 { let display = if content_str.len() > 500 {
// Walk back to the nearest char boundary so we
// don't panic when the 500-byte mark lands
// inside a multi-byte UTF-8 codepoint (e.g.
// box-drawing chars like '─', smart quotes,
// emoji). `is_char_boundary(len)` is always
// true so the loop terminates.
let mut end = 500;
while !content_str.is_char_boundary(end) {
end -= 1;
}
format!( format!(
"{}... [{} chars total]", "{}... [{} chars total]",
&content_str[..500], &content_str[..end],
content_str.len() content_str.len()
) )
} else { } else {
@@ -129,3 +139,42 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
_ => None, _ => None,
} }
} }
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
/// Regression: a tool_result whose content is >500 bytes AND has a
/// multi-byte UTF-8 codepoint straddling byte 500 must not panic.
/// Previously `&content_str[..500]` would slice mid-codepoint and crash
/// the get_agent_output MCP tool.
#[test]
fn tool_result_truncation_handles_multibyte_at_boundary() {
// 498 ASCII filler + a 3-byte '─' (U+2500) starting at byte 499 +
// 100 more ASCII chars. The naive `..500` slice would land inside
// the box-drawing char and panic.
let mut content = "a".repeat(499);
content.push('─');
content.push_str(&"b".repeat(100));
assert!(content.len() > 500);
assert!(!content.is_char_boundary(500));
let event = json!({
"type": "agent_json",
"agent_name": "coder-1",
"data": {
"type": "user",
"message": {
"content": [{ "type": "tool_result", "content": content }]
}
}
});
let out = format_log_entry_as_text("2026-05-12T15:30:00.000000Z", &event);
assert!(out.is_some(), "tool_result must format without panicking");
let s = out.unwrap();
assert!(s.contains("RESULT:"));
assert!(s.contains("chars total"));
}
}