fix: get_agent_output no longer panics on tool_result content with multi-byte UTF-8 at byte 500
agent_log::format::format_log_entry_as_text was truncating long tool_result strings via the naive byte slice `&content_str[..500]`. When byte 500 fell inside a multi-byte UTF-8 codepoint (box-drawing chars like '─', smart quotes, emoji), the slice panicked, propagating up through the MCP get_agent_output dispatcher and surfacing as an internal-error response. This blocked any diagnostic readout of a coder's session that had emitted tool output containing those chars. Walk back to the nearest char boundary with `is_char_boundary` before slicing. Regression test asserts the formatter doesn't panic on a 599-byte string with a 3-byte '─' straddling byte 500. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -104,9 +104,19 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
|
|||||||
None => String::new(),
|
None => String::new(),
|
||||||
};
|
};
|
||||||
let display = if content_str.len() > 500 {
|
let display = if content_str.len() > 500 {
|
||||||
|
// Walk back to the nearest char boundary so we
|
||||||
|
// don't panic when the 500-byte mark lands
|
||||||
|
// inside a multi-byte UTF-8 codepoint (e.g.
|
||||||
|
// box-drawing chars like '─', smart quotes,
|
||||||
|
// emoji). `is_char_boundary(len)` is always
|
||||||
|
// true so the loop terminates.
|
||||||
|
let mut end = 500;
|
||||||
|
while !content_str.is_char_boundary(end) {
|
||||||
|
end -= 1;
|
||||||
|
}
|
||||||
format!(
|
format!(
|
||||||
"{}... [{} chars total]",
|
"{}... [{} chars total]",
|
||||||
&content_str[..500],
|
&content_str[..end],
|
||||||
content_str.len()
|
content_str.len()
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
@@ -129,3 +139,42 @@ pub fn format_log_entry_as_text(timestamp: &str, event: &serde_json::Value) -> O
|
|||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
/// Regression: a tool_result whose content is >500 bytes AND has a
|
||||||
|
/// multi-byte UTF-8 codepoint straddling byte 500 must not panic.
|
||||||
|
/// Previously `&content_str[..500]` would slice mid-codepoint and crash
|
||||||
|
/// the get_agent_output MCP tool.
|
||||||
|
#[test]
|
||||||
|
fn tool_result_truncation_handles_multibyte_at_boundary() {
|
||||||
|
// 498 ASCII filler + a 3-byte '─' (U+2500) starting at byte 499 +
|
||||||
|
// 100 more ASCII chars. The naive `..500` slice would land inside
|
||||||
|
// the box-drawing char and panic.
|
||||||
|
let mut content = "a".repeat(499);
|
||||||
|
content.push('─');
|
||||||
|
content.push_str(&"b".repeat(100));
|
||||||
|
assert!(content.len() > 500);
|
||||||
|
assert!(!content.is_char_boundary(500));
|
||||||
|
|
||||||
|
let event = json!({
|
||||||
|
"type": "agent_json",
|
||||||
|
"agent_name": "coder-1",
|
||||||
|
"data": {
|
||||||
|
"type": "user",
|
||||||
|
"message": {
|
||||||
|
"content": [{ "type": "tool_result", "content": content }]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let out = format_log_entry_as_text("2026-05-12T15:30:00.000000Z", &event);
|
||||||
|
assert!(out.is_some(), "tool_result must format without panicking");
|
||||||
|
let s = out.unwrap();
|
||||||
|
assert!(s.contains("RESULT:"));
|
||||||
|
assert!(s.contains("chars total"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user