huskies: merge 927

2026-05-12 17:49:44 +00:00
parent b8945654bf
commit 03a99b3cf1
33 changed files with 119 additions and 25 deletions
@@ -86,6 +86,7 @@ pub(super) fn handle_cost(ctx: &CommandContext) -> Option<String> {
 ///
 /// Agent names like "coder-1", "qa-2", "mergemaster" map to types "coder",
 /// "qa", "mergemaster". If the name ends with `-<digits>`, strip the suffix.
+#[allow(clippy::string_slice)] // pos comes from rfind('-'), so pos+1 is after an ASCII '-' → valid boundary
 pub(super) fn extract_agent_type(agent_name: &str) -> String {
    if let Some(pos) = agent_name.rfind('-') {
        let suffix = &agent_name[pos + 1..];
@@ -240,6 +240,7 @@ fn parse_coverage_output(output: &str, passed: bool) -> String {
 }

 /// Extract a value from lines like `"Rust line coverage: 62.5%"`.
+#[allow(clippy::string_slice)] // starts_with(prefix) guarantees prefix.len() is a char boundary
 fn extract_line_value(output: &str, prefix: &str) -> Option<String> {
    output
        .lines()
@@ -248,6 +249,7 @@ fn extract_line_value(output: &str, prefix: &str) -> Option<String> {
 }

 /// Extract a value from the summary block: `"  Overall:  62.5%"`.
+#[allow(clippy::string_slice)] // starts_with(label) guarantees label.len() is a char boundary
 fn extract_summary_field(output: &str, label: &str) -> Option<String> {
    output
        .lines()
@@ -4,6 +4,7 @@
 //! HEAD, formatted for readability in chat.

 use super::CommandContext;
+use crate::chat::util::truncate_at_char_boundary;
 use std::path::Path;
 use std::process::Command;

@@ -125,18 +126,6 @@ fn run_git(dir: &Path, args: &[&str]) -> String {
        .unwrap_or_default()
 }

-/// Truncate `s` to at most `max_bytes` bytes without splitting a UTF-8 character.
-fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> &str {
-    if s.len() <= max_bytes {
-        return s;
-    }
-    let mut boundary = max_bytes;
-    while !s.is_char_boundary(boundary) {
-        boundary -= 1;
-    }
-    &s[..boundary]
-}
-
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
@@ -3,6 +3,7 @@
 use super::CommandContext;

 /// Show compact git status: branch, uncommitted files, ahead/behind remote.
+#[allow(clippy::string_slice)] // line[..2] and line[3..]: git porcelain XY status codes are always ASCII
 pub(super) fn handle_git(ctx: &CommandContext) -> Option<String> {
    use std::process::Command;

@@ -8,6 +8,7 @@ use super::CommandContext;
 /// git diff --stat (files changed with line counts), and extracts key
 /// function/struct/type names added or modified in the implementation.
 /// Returns a friendly message when no merge commit is found.
+#[allow(clippy::string_slice)] // commit_hash is hex (ASCII), min(8) always within bounds
 pub(super) fn handle_overview(ctx: &CommandContext) -> Option<String> {
    let num_str = ctx.args.trim();
    if num_str.is_empty() {
@@ -129,6 +130,7 @@ fn get_commit_stat(root: &std::path::Path, hash: &str) -> String {
 ///
 /// Scans added lines (`+`) for Rust `fn`, `struct`, `enum`, `type`, `trait`,
 /// and `impl` declarations and returns them formatted as `` `Name` (kind) ``.
+#[allow(clippy::string_slice)] // line starts with '+' (ASCII), so &line[1..] is always a valid boundary
 fn extract_diff_symbols(root: &std::path::Path, hash: &str) -> Vec<String> {
    use std::process::Command;
    let output = Command::new("git")
@@ -123,6 +123,7 @@ fn parse_test_counts(output: &str) -> (u64, u64) {
    (total_passed, total_failed)
 }

+#[allow(clippy::string_slice)] // pos from line.find(label) → always a char boundary
 fn extract_count(line: &str, label: &str) -> Option<u64> {
    let pos = line.find(label)?;
    let before = line[..pos].trim_end();
@@ -3,6 +3,7 @@
 use super::CommandContext;

 /// Strip YAML front matter and return a summary of useful fields + the remaining body.
+#[allow(clippy::string_slice)] // indices from find("\n---") on ASCII delimiter; "---" and "\n---" are ASCII-only
 fn strip_front_matter(text: &str) -> (String, String) {
    let trimmed = text.trim_start();
    if !trimmed.starts_with("---") {
@@ -462,7 +462,7 @@ fn status_shows_crdt_done_story_in_done_not_backlog() {
    );

    // Verify it's not in Backlog section specifically.
-    let backlog_section = &output[backlog_pos..done_pos];
+    let backlog_section = output.get(backlog_pos..done_pos).unwrap_or("");
    assert!(
        !backlog_section.contains("503"),
        "503 must not appear in Backlog section: {backlog_section}"
@@ -573,10 +573,16 @@ fn merge_item_failure_snippet_truncated_at_120_chars() {
    );
    // The snippet should not exceed 120 chars plus the ellipsis character.
    let snippet_start = output.find("\u{26D4}").expect("stop sign must be present");
-    let line = output[snippet_start..].lines().next().unwrap_or("");
+    let line = output
+        .get(snippet_start..)
+        .unwrap_or("")
+        .lines()
+        .next()
+        .unwrap_or("");
    // Find the last " — " separator (before the snippet) and take what follows.
    if let Some(sep_pos) = line.rfind(" \u{2014} ") {
-        let snippet = &line[sep_pos + 5..]; // " — " is 5 bytes (space + 3-byte em dash + space)
+        // " — " is 5 bytes (space + 3-byte em dash + space)
+        let snippet = line.get(sep_pos + 5..).unwrap_or("");
        assert!(
            snippet.chars().count() <= 122, // 120 chars + "…" (1 char) + possible trailing
            "snippet should be at most ~121 chars: {snippet}"
@@ -3,7 +3,7 @@
 use async_trait::async_trait;
 use serde::Deserialize;

-use crate::chat::{ChatTransport, MessageId};
+use crate::chat::{ChatTransport, MessageId, util::truncate_at_char_boundary};
 use crate::slog;

 // ── Discord API base URL (overridable for tests) ──────────────────────
@@ -71,7 +71,7 @@ impl ChatTransport for DiscordTransport {

        // Discord messages have a 2000-char limit. Truncate if needed.
        let content = if plain.len() > 2000 {
-            format!("{}…", &plain[..1999])
+            format!("{}…", truncate_at_char_boundary(plain, 1999))
        } else {
            plain.to_string()
        };
@@ -118,7 +118,7 @@ impl ChatTransport for DiscordTransport {
        );

        let content = if plain.len() > 2000 {
-            format!("{}…", &plain[..1999])
+            format!("{}…", truncate_at_char_boundary(plain, 1999))
        } else {
            plain.to_string()
        };
@@ -37,6 +37,7 @@ pub fn mentions_bot(body: &str, formatted_body: Option<&str>, bot_user_id: &Owne
 }

 /// Returns `true` if `haystack` contains `needle` at a word boundary.
+#[allow(clippy::string_slice)] // all indices from find() or abs+needle.len() → always char boundaries
 pub(super) fn contains_word(haystack: &str, needle: &str) -> bool {
    let mut start = 0;
    while let Some(rel) = haystack[start..].find(needle) {
@@ -87,6 +88,7 @@ pub(super) async fn is_reply_to_bot(
 ///
 /// Used in ambient mode to suppress responses when a message is clearly
 /// directed at a different participant (e.g. another bot in the same room).
+#[allow(clippy::string_slice)] // word_end and colon_pos from find() → always char boundaries
 pub fn is_addressed_to_other(body: &str, bot_user_id: &OwnedUserId, bot_name: &str) -> bool {
    let trimmed = body.trim_start();
    let lower = trimmed.to_lowercase();
@@ -101,6 +101,7 @@ fn parse_duration(s: &str) -> Option<u64> {
 ///
 /// Returns a short string like `"load average: 1.23, 0.98, 0.75"` on success,
 /// or `"load: unknown"` on failure.
+#[allow(clippy::string_slice)] // idx comes from output.find("load average") → always a char boundary
 fn get_load_average() -> String {
    let output = std::process::Command::new("uptime")
        .output()
@@ -1,4 +1,5 @@
 //! WhatsApp message formatting — Markdown-to-WhatsApp conversion and message chunking.
+use crate::chat::util::truncate_at_char_boundary;
 use regex::Regex;
 use std::sync::LazyLock;

@@ -24,13 +25,13 @@ pub fn chunk_for_whatsapp(text: &str) -> Vec<String> {
        }

        // Find the best split point within the limit.
-        let window = &remaining[..WHATSAPP_MAX_MESSAGE_LEN];
+        let window = truncate_at_char_boundary(remaining, WHATSAPP_MAX_MESSAGE_LEN);

        // Prefer paragraph boundary.
        let split_pos = window
            .rfind("\n\n")
            .or_else(|| window.rfind('\n'))
-            .unwrap_or(WHATSAPP_MAX_MESSAGE_LEN);
+            .unwrap_or(window.len());

        let (chunk, rest) = remaining.split_at(split_pos);
        let chunk = chunk.trim();
@@ -3,6 +3,23 @@
 //! These functions are transport-agnostic helpers for processing chat messages:
 //! prefix stripping, bot-mention handling, and paragraph buffering.

+/// Truncate `s` to at most `max_bytes` bytes without splitting a UTF-8 codepoint.
+///
+/// If `s.len() <= max_bytes` the original slice is returned unchanged.
+/// Otherwise the returned slice ends at the largest char boundary ≤ `max_bytes`,
+/// preventing any panic that would result from slicing mid-codepoint.
+pub fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> &str {
+    if s.len() <= max_bytes {
+        return s;
+    }
+    let mut boundary = max_bytes;
+    while !s.is_char_boundary(boundary) {
+        boundary -= 1;
+    }
+    #[allow(clippy::string_slice)] // boundary is guaranteed to be a char boundary by the loop above
+    &s[..boundary]
+}
+
 /// Returns `true` if the message body is an affirmative permission response.
 ///
 /// Recognised affirmative tokens (case-insensitive): `yes`, `y`, `approve`,
@@ -26,6 +43,7 @@ pub fn is_permission_approval(body: &str) -> bool {

 /// Case-insensitive prefix strip that also requires the match to end at a
 /// word boundary (whitespace, punctuation, or end-of-string).
+#[allow(clippy::string_slice)] // prefix.len() is safe: `get(..prefix.len())` already validated the boundary
 pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
    let candidate = text.get(..prefix.len())?;
    if !candidate.eq_ignore_ascii_case(prefix) {
@@ -50,6 +68,7 @@ pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
 /// - `DisplayName, rest` → `rest`  (Element tab-completion may insert a comma)
 /// - `DisplayName ⚡️: rest` → `rest`  (display name with emoji)
 /// - `[DisplayName](https://matrix.to/#/@user:server) rest` → `rest`  (Element mention pill)
+#[allow(clippy::string_slice)] // all indices come from str::find / str::find_map → always char boundaries
 pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
    let trimmed = message.trim();

@@ -98,6 +117,7 @@ pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str
 /// emoji from display names (e.g. `Timmy ⚡️`), and Element tab-completion
 /// separators (`:` or `,`).  This function skips all of that and returns a
 /// slice starting at the first ASCII alphanumeric character (the command).
+#[allow(clippy::string_slice)] // byte_skip comes from char_indices → guaranteed char boundary
 fn strip_mention_separator(rest: &str) -> &str {
    let byte_skip = rest
        .char_indices()
@@ -132,6 +152,7 @@ fn is_inside_code_fence(text: &str) -> bool {
 /// block (delimited by ` ``` ` lines) is **not** treated as a paragraph
 /// boundary.  This prevents a blank line inside a code block from splitting
 /// the fence across multiple messages, which would corrupt the rendering.
+#[allow(clippy::string_slice)] // abs_pos comes from str::find → always a char boundary
 pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {
    let mut paragraphs = Vec::new();
    let mut search_from = 0;
@@ -259,6 +280,47 @@ pub fn normalize_line_breaks(text: &str) -> String {
 mod tests {
    use super::*;

+    // -- truncate_at_char_boundary ------------------------------------------
+
+    #[test]
+    fn truncate_ascii_within_limit() {
+        assert_eq!(truncate_at_char_boundary("hello", 10), "hello");
+    }
+
+    #[test]
+    fn truncate_ascii_at_exact_limit() {
+        assert_eq!(truncate_at_char_boundary("hello", 5), "hello");
+    }
+
+    #[test]
+    fn truncate_ascii_over_limit() {
+        assert_eq!(truncate_at_char_boundary("hello world", 5), "hello");
+    }
+
+    #[test]
+    fn truncate_multibyte_mid_codepoint_snaps_back() {
+        // "héllo": 'é' is U+00E9, 2 bytes. max_bytes=2 lands inside it → snap to 1.
+        let s = "héllo";
+        assert_eq!(truncate_at_char_boundary(s, 2), "h");
+    }
+
+    #[test]
+    fn truncate_multibyte_on_char_boundary() {
+        // "héllo": h(1) + é(2) = 3 bytes. max_bytes=3 is a valid boundary.
+        let s = "héllo";
+        assert_eq!(truncate_at_char_boundary(s, 3), "hé");
+    }
+
+    #[test]
+    fn truncate_max_bytes_zero_returns_empty() {
+        assert_eq!(truncate_at_char_boundary("hello", 0), "");
+    }
+
+    #[test]
+    fn truncate_max_bytes_greater_than_len() {
+        assert_eq!(truncate_at_char_boundary("hi", 100), "hi");
+    }
+
    // -- is_permission_approval ---------------------------------------------

    #[test]