storkit: merge 392_refactor_extract_shared_transport_utilities_from_matrix_module_into_chat_submodule

2026-03-25 14:43:28 +00:00
parent 580ab1ce68
commit 077288e7b7
21 changed files with 344 additions and 326 deletions
@@ -1,4 +1,4 @@
-//! Bot-level command registry for the Matrix bot.
+//! Bot-level command registry shared by all chat transports.
 //!
 //! Commands registered here are handled directly by the bot without invoking
 //! the LLM. The registry is the single source of truth — the `help` command
@@ -18,6 +18,7 @@ mod triage;
 mod unreleased;

 use crate::agents::AgentPool;
+use crate::chat::util::strip_bot_mention;
 use std::collections::HashSet;
 use std::path::Path;
 use std::sync::{Arc, Mutex};
@@ -163,8 +164,8 @@ pub fn commands() -> &'static [BotCommand] {

 /// Try to match a user message against a registered bot command.
 ///
-/// The message is expected to be the raw body text from Matrix (e.g.,
-/// `"@timmy help"`). The bot mention prefix is stripped before matching.
+/// The message is expected to be the raw body text (e.g., `"@timmy help"`).
+/// The bot mention prefix is stripped before matching.
 ///
 /// Returns `Some(response)` if a command matched and was handled, `None`
 /// otherwise (the caller should fall through to the LLM).
@@ -196,51 +197,6 @@ pub fn try_handle_command(dispatch: &CommandDispatch<'_>, message: &str) -> Opti
        .and_then(|c| (c.handler)(&ctx))
 }

-/// Strip the bot mention prefix from a raw message body.
-///
-/// Handles these forms (case-insensitive where applicable):
-/// - `@bot_localpart:server.com rest` → `rest`
-/// - `@bot_localpart rest` → `rest`
-/// - `DisplayName rest` → `rest`
-fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
-    let trimmed = message.trim();
-
-    // Try full Matrix user ID (e.g. "@timmy:homeserver.local")
-    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
-        return rest;
-    }
-
-    // Try @localpart (e.g. "@timmy")
-    if let Some(localpart) = bot_user_id.split(':').next()
-        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
-    {
-        return rest;
-    }
-
-    // Try display name (e.g. "Timmy")
-    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
-        return rest;
-    }
-
-    trimmed
-}
-
-/// Case-insensitive prefix strip that also requires the match to end at a
-/// word boundary (whitespace, punctuation, or end-of-string).
-fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
-    let candidate = text.get(..prefix.len())?;
-    if !candidate.eq_ignore_ascii_case(prefix) {
-        return None;
-    }
-    let rest = &text[prefix.len()..];
-    // Must be at end or followed by non-alphanumeric
-    match rest.chars().next() {
-        None => Some(rest), // exact match, empty remainder
-        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
-        _ => Some(rest),
-    }
-}
-
 /// Fallback handler for the `htop` command when it is not intercepted by the
 /// async handler in `on_room_message`.  In practice this is never called —
 /// htop is detected and handled before `try_handle_command` is invoked.
@@ -346,55 +302,6 @@ pub(crate) mod tests {
    // Re-export commands() for submodule tests
    pub use super::commands;

-    // -- strip_bot_mention --------------------------------------------------
-
-    #[test]
-    fn strip_mention_full_user_id() {
-        let rest = strip_bot_mention(
-            "@timmy:homeserver.local help",
-            "Timmy",
-            "@timmy:homeserver.local",
-        );
-        assert_eq!(rest.trim(), "help");
-    }
-
-    #[test]
-    fn strip_mention_localpart() {
-        let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim(), "help me");
-    }
-
-    #[test]
-    fn strip_mention_display_name() {
-        let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim(), "help");
-    }
-
-    #[test]
-    fn strip_mention_display_name_case_insensitive() {
-        let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim(), "help");
-    }
-
-    #[test]
-    fn strip_mention_no_match_returns_original() {
-        let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest, "hello world");
-    }
-
-    #[test]
-    fn strip_mention_does_not_match_longer_name() {
-        // "@timmybot" should NOT match "@timmy"
-        let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest, "@timmybot help");
-    }
-
-    #[test]
-    fn strip_mention_comma_after_name() {
-        let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
-        assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");
-    }
-
    // -- try_handle_command -------------------------------------------------

    #[test]
@@ -427,44 +334,6 @@ pub(crate) mod tests {
        );
    }

-    // -- strip_prefix_ci ----------------------------------------------------
-
-    #[test]
-    fn strip_prefix_ci_basic() {
-        assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
-    }
-
-    #[test]
-    fn strip_prefix_ci_no_match() {
-        assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
-    }
-
-    #[test]
-    fn strip_prefix_ci_word_boundary_required() {
-        assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
-    }
-
-    #[test]
-    fn strip_prefix_ci_exact_match() {
-        assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
-    }
-
-    #[test]
-    fn strip_prefix_ci_multibyte_no_panic_smart_quote() {
-        // "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5.
-        // A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char.
-        // Previously this caused: "byte index 6 is not a char boundary".
-        let text = "abcde\u{2019}xyz";
-        assert_eq!(strip_prefix_ci(text, "abcdef"), None);
-    }
-
-    #[test]
-    fn strip_prefix_ci_multibyte_no_panic_emoji() {
-        // U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it.
-        let text = "abc\u{1F600}def";
-        assert_eq!(strip_prefix_ci(text, "abcd"), None);
-    }
-
    // -- commands registry --------------------------------------------------

    #[test]
@@ -4,7 +4,9 @@
 //! sending and editing messages, allowing the bot logic (commands, htop,
 //! notifications) to work against any chat platform — Matrix, WhatsApp, etc.

+pub mod commands;
 pub mod transport;
+pub mod util;

 use async_trait::async_trait;

@@ -1,8 +1,9 @@
 use crate::agents::AgentPool;
+use crate::chat::ChatTransport;
+use crate::chat::util::drain_complete_paragraphs;
 use crate::http::context::{PermissionDecision, PermissionForward};
 use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
 use crate::slog;
-use crate::chat::ChatTransport;
 use matrix_sdk::{
    Client,
    config::SyncSettings,
@@ -1362,59 +1363,6 @@ pub fn markdown_to_html(markdown: &str) -> String {
    html_output
 }

-// ---------------------------------------------------------------------------
-// Paragraph buffering helper
-// ---------------------------------------------------------------------------
-
-/// Returns `true` when `text` ends while inside an open fenced code block.
-///
-/// A fenced code block opens and closes on lines that start with ` ``` `
-/// (three or more backticks).  We count the fence markers and return `true`
-/// when the count is odd (a fence was opened but not yet closed).
-fn is_inside_code_fence(text: &str) -> bool {
-    let mut in_fence = false;
-    for line in text.lines() {
-        if line.trim_start().starts_with("```") {
-            in_fence = !in_fence;
-        }
-    }
-    in_fence
-}
-
-/// Drain all complete paragraphs from `buffer` and return them.
-///
-/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph
-/// is trimmed of surrounding whitespace; empty paragraphs are discarded.
-/// The buffer is left with only the remaining incomplete text.
-///
-/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code
-/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph
-/// boundary.  This prevents a blank line inside a code block from splitting
-/// the fence across multiple Matrix messages, which would corrupt the
-/// rendering of the second half.
-pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {
-    let mut paragraphs = Vec::new();
-    let mut search_from = 0;
-    loop {
-        let Some(pos) = buffer[search_from..].find("\n\n") else {
-            break;
-        };
-        let abs_pos = search_from + pos;
-        // Only split at this boundary when we are NOT inside a code fence.
-        if is_inside_code_fence(&buffer[..abs_pos]) {
-            // Skip past this \n\n and keep looking for the next boundary.
-            search_from = abs_pos + 2;
-        } else {
-            let chunk = buffer[..abs_pos].trim().to_string();
-            *buffer = buffer[abs_pos + 2..].to_string();
-            search_from = 0;
-            if !chunk.is_empty() {
-                paragraphs.push(chunk);
-            }
-        }
-    }
-    paragraphs
-}

 // ---------------------------------------------------------------------------
 // Tests
@@ -1623,129 +1571,6 @@ mod tests {
        let _cloned = ctx.clone();
    }

-    // -- drain_complete_paragraphs ------------------------------------------
-
-    #[test]
-    fn drain_complete_paragraphs_no_boundary_returns_empty() {
-        let mut buf = "Hello World".to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert!(paras.is_empty());
-        assert_eq!(buf, "Hello World");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_single_boundary() {
-        let mut buf = "Paragraph one.\n\nParagraph two.".to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert_eq!(paras, vec!["Paragraph one."]);
-        assert_eq!(buf, "Paragraph two.");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_multiple_boundaries() {
-        let mut buf = "A\n\nB\n\nC".to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert_eq!(paras, vec!["A", "B"]);
-        assert_eq!(buf, "C");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_trailing_boundary() {
-        let mut buf = "A\n\nB\n\n".to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert_eq!(paras, vec!["A", "B"]);
-        assert_eq!(buf, "");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_empty_input() {
-        let mut buf = String::new();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert!(paras.is_empty());
-        assert_eq!(buf, "");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_skips_empty_chunks() {
-        // Consecutive double-newlines produce no empty paragraphs.
-        let mut buf = "\n\n\n\nHello".to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert!(paras.is_empty());
-        assert_eq!(buf, "Hello");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_trims_whitespace() {
-        let mut buf = "  Hello  \n\n  World  ".to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert_eq!(paras, vec!["Hello"]);
-        assert_eq!(buf, "  World  ");
-    }
-
-    // -- drain_complete_paragraphs: code-fence awareness -------------------
-
-    #[test]
-    fn drain_complete_paragraphs_code_fence_blank_line_not_split() {
-        // A blank line inside a fenced code block must NOT trigger a split.
-        // Before the fix the function would split at the blank line and the
-        // second half would be sent without the opening fence, breaking rendering.
-        let mut buf =
-            "```rust\nfn foo() {\n    let x = 1;\n\n    let y = 2;\n}\n```\n\nNext paragraph."
-                .to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert_eq!(
-            paras.len(),
-            1,
-            "code fence with blank line should not be split into multiple messages: {paras:?}"
-        );
-        assert!(
-            paras[0].starts_with("```rust"),
-            "first paragraph should be the code fence: {:?}",
-            paras[0]
-        );
-        assert!(
-            paras[0].contains("let y = 2;"),
-            "code fence should contain content from both sides of the blank line: {:?}",
-            paras[0]
-        );
-        assert_eq!(buf, "Next paragraph.");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_text_before_and_after_fenced_block() {
-        // Text paragraph, then a code block with an internal blank line, then more text.
-        let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string();
-        let paras = drain_complete_paragraphs(&mut buf);
-        assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}");
-        assert_eq!(paras[0], "Before");
-        assert!(
-            paras[1].starts_with("```"),
-            "second paragraph should be the code fence: {:?}",
-            paras[1]
-        );
-        assert!(
-            paras[1].contains("more code"),
-            "code fence content must include the part after the blank line: {:?}",
-            paras[1]
-        );
-        assert_eq!(buf, "After");
-    }
-
-    #[test]
-    fn drain_complete_paragraphs_incremental_simulation() {
-        // Simulate tokens arriving one character at a time.
-        let mut buf = String::new();
-        let mut all_paragraphs = Vec::new();
-
-        for ch in "First para.\n\nSecond para.\n\nThird.".chars() {
-            buf.push(ch);
-            all_paragraphs.extend(drain_complete_paragraphs(&mut buf));
-        }
-
-        assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);
-        assert_eq!(buf, "Third.");
-    }
-
    // -- format_user_prompt -------------------------------------------------

    #[test]
@@ -0,0 +1,7 @@
+//! Re-exports from `crate::chat::commands`.
+//!
+//! The command dispatch infrastructure has moved to `crate::chat::commands` so
+//! it can be shared by all transports. This module re-exports everything for
+//! backwards compatibility with in-tree references.
+
+pub use crate::chat::commands::*;
@@ -18,7 +18,7 @@
 pub mod assign;
 mod bot;
 pub mod commands;
-mod config;
+pub(crate) mod config;
 pub mod delete;
 pub mod htop;
 pub mod rebuild;
@@ -28,7 +28,7 @@ pub mod start;
 pub mod notifications;
 pub mod transport_impl;

-pub use bot::{ConversationEntry, ConversationRole, RoomConversation, drain_complete_paragraphs};
+pub use bot::{ConversationEntry, ConversationRole, RoomConversation};
 pub use config::BotConfig;

 use crate::agents::AgentPool;
@@ -356,14 +356,14 @@ mod tests {

    #[test]
    fn start_command_is_registered() {
-        use crate::chat::transport::matrix::commands::commands;
+        use crate::chat::commands::commands;
        let found = commands().iter().any(|c| c.name == "start");
        assert!(found, "start command must be in the registry");
    }

    #[test]
    fn start_command_appears_in_help() {
-        let result = crate::chat::transport::matrix::commands::tests::try_cmd_addressed(
+        let result = crate::chat::commands::tests::try_cmd_addressed(
            "Timmy",
            "@timmy:homeserver.local",
            "@timmy help",
@@ -378,7 +378,7 @@ mod tests {
    #[test]
    fn start_command_falls_through_to_none_in_registry() {
        // The start handler in the registry returns None (handled async in bot.rs).
-        let result = crate::chat::transport::matrix::commands::tests::try_cmd_addressed(
+        let result = crate::chat::commands::tests::try_cmd_addressed(
            "Timmy",
            "@timmy:homeserver.local",
            "@timmy start 42",
@@ -669,7 +669,7 @@ pub async fn slash_command_receive(
        format!("{} {keyword} {}", ctx.bot_name, payload.text)
    };

-    use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
+    use crate::chat::commands::{CommandDispatch, try_handle_command};

    let dispatch = CommandDispatch {
        bot_name: &ctx.bot_name,
@@ -702,7 +702,7 @@ async fn handle_incoming_message(
    user: &str,
    message: &str,
 ) {
-    use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
+    use crate::chat::commands::{CommandDispatch, try_handle_command};

    let dispatch = CommandDispatch {
        bot_name: &ctx.bot_name,
@@ -815,7 +815,7 @@ async fn handle_llm_message(
    user_message: &str,
 ) {
    use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
-    use crate::chat::transport::matrix::drain_complete_paragraphs;
+    use crate::chat::util::drain_complete_paragraphs;
    use std::sync::atomic::{AtomicBool, Ordering};
    use tokio::sync::watch;

@@ -1471,7 +1471,7 @@ mod tests {
    fn slash_command_dispatches_through_command_registry() {
        // Verify that the synthetic message built by the slash handler
        // correctly dispatches through try_handle_command.
-        use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
+        use crate::chat::commands::{CommandDispatch, try_handle_command};

        let agents = test_agents();
        let ambient_rooms = test_ambient_rooms();
@@ -1498,7 +1498,7 @@ mod tests {

    #[test]
    fn slash_command_show_passes_args_through_registry() {
-        use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
+        use crate::chat::commands::{CommandDispatch, try_handle_command};

        let agents = test_agents();
        let ambient_rooms = test_ambient_rooms();
@@ -978,7 +978,7 @@ pub async fn webhook_receive(

 /// Dispatch an incoming WhatsApp message to bot commands.
 async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, message: &str) {
-    use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
+    use crate::chat::commands::{CommandDispatch, try_handle_command};

    // Allowlist check: when configured, silently ignore unauthorized senders.
    if !ctx.allowed_phones.is_empty()
@@ -1071,7 +1071,7 @@ async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, mes

 /// Forward a message to Claude Code and send the response back via WhatsApp.
 async fn handle_llm_message(ctx: &WhatsAppWebhookContext, sender: &str, user_message: &str) {
-    use crate::chat::transport::matrix::drain_complete_paragraphs;
+    use crate::chat::util::drain_complete_paragraphs;
    use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
    use std::sync::atomic::{AtomicBool, Ordering};
    use tokio::sync::watch;
@@ -0,0 +1,315 @@
+//! Shared text utilities used by all chat transports.
+//!
+//! These functions are transport-agnostic helpers for processing chat messages:
+//! prefix stripping, bot-mention handling, and paragraph buffering.
+
+/// Case-insensitive prefix strip that also requires the match to end at a
+/// word boundary (whitespace, punctuation, or end-of-string).
+pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
+    let candidate = text.get(..prefix.len())?;
+    if !candidate.eq_ignore_ascii_case(prefix) {
+        return None;
+    }
+    let rest = &text[prefix.len()..];
+    // Must be at end or followed by non-alphanumeric
+    match rest.chars().next() {
+        None => Some(rest), // exact match, empty remainder
+        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
+        _ => Some(rest),
+    }
+}
+
+/// Strip the bot mention prefix from a raw message body.
+///
+/// Handles these forms (case-insensitive where applicable):
+/// - `@bot_localpart:server.com rest` → `rest`
+/// - `@bot_localpart rest` → `rest`
+/// - `DisplayName rest` → `rest`
+pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
+    let trimmed = message.trim();
+
+    // Try full Matrix user ID (e.g. "@timmy:homeserver.local")
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
+        return rest;
+    }
+
+    // Try @localpart (e.g. "@timmy")
+    if let Some(localpart) = bot_user_id.split(':').next()
+        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
+    {
+        return rest;
+    }
+
+    // Try display name (e.g. "Timmy")
+    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
+        return rest;
+    }
+
+    trimmed
+}
+
+/// Returns `true` when `text` ends while inside an open fenced code block.
+///
+/// A fenced code block opens and closes on lines that start with ` ``` `
+/// (three or more backticks).  We count the fence markers and return `true`
+/// when the count is odd (a fence was opened but not yet closed).
+fn is_inside_code_fence(text: &str) -> bool {
+    let mut in_fence = false;
+    for line in text.lines() {
+        if line.trim_start().starts_with("```") {
+            in_fence = !in_fence;
+        }
+    }
+    in_fence
+}
+
+/// Drain all complete paragraphs from `buffer` and return them.
+///
+/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph
+/// is trimmed of surrounding whitespace; empty paragraphs are discarded.
+/// The buffer is left with only the remaining incomplete text.
+///
+/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code
+/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph
+/// boundary.  This prevents a blank line inside a code block from splitting
+/// the fence across multiple messages, which would corrupt the rendering.
+pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {
+    let mut paragraphs = Vec::new();
+    let mut search_from = 0;
+    loop {
+        let Some(pos) = buffer[search_from..].find("\n\n") else {
+            break;
+        };
+        let abs_pos = search_from + pos;
+        // Only split at this boundary when we are NOT inside a code fence.
+        if is_inside_code_fence(&buffer[..abs_pos]) {
+            // Skip past this \n\n and keep looking for the next boundary.
+            search_from = abs_pos + 2;
+        } else {
+            let chunk = buffer[..abs_pos].trim().to_string();
+            *buffer = buffer[abs_pos + 2..].to_string();
+            search_from = 0;
+            if !chunk.is_empty() {
+                paragraphs.push(chunk);
+            }
+        }
+    }
+    paragraphs
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // -- strip_prefix_ci ----------------------------------------------------
+
+    #[test]
+    fn strip_prefix_ci_basic() {
+        assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
+    }
+
+    #[test]
+    fn strip_prefix_ci_no_match() {
+        assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
+    }
+
+    #[test]
+    fn strip_prefix_ci_word_boundary_required() {
+        assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
+    }
+
+    #[test]
+    fn strip_prefix_ci_exact_match() {
+        assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
+    }
+
+    #[test]
+    fn strip_prefix_ci_multibyte_no_panic_smart_quote() {
+        // "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5.
+        // A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char.
+        // Previously this caused: "byte index 6 is not a char boundary".
+        let text = "abcde\u{2019}xyz";
+        assert_eq!(strip_prefix_ci(text, "abcdef"), None);
+    }
+
+    #[test]
+    fn strip_prefix_ci_multibyte_no_panic_emoji() {
+        // U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it.
+        let text = "abc\u{1F600}def";
+        assert_eq!(strip_prefix_ci(text, "abcd"), None);
+    }
+
+    // -- strip_bot_mention --------------------------------------------------
+
+    #[test]
+    fn strip_mention_full_user_id() {
+        let rest = strip_bot_mention(
+            "@timmy:homeserver.local help",
+            "Timmy",
+            "@timmy:homeserver.local",
+        );
+        assert_eq!(rest.trim(), "help");
+    }
+
+    #[test]
+    fn strip_mention_localpart() {
+        let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim(), "help me");
+    }
+
+    #[test]
+    fn strip_mention_display_name() {
+        let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim(), "help");
+    }
+
+    #[test]
+    fn strip_mention_display_name_case_insensitive() {
+        let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim(), "help");
+    }
+
+    #[test]
+    fn strip_mention_no_match_returns_original() {
+        let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest, "hello world");
+    }
+
+    #[test]
+    fn strip_mention_does_not_match_longer_name() {
+        // "@timmybot" should NOT match "@timmy"
+        let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest, "@timmybot help");
+    }
+
+    #[test]
+    fn strip_mention_comma_after_name() {
+        let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
+        assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");
+    }
+
+    // -- drain_complete_paragraphs ------------------------------------------
+
+    #[test]
+    fn drain_complete_paragraphs_no_boundary_returns_empty() {
+        let mut buf = "Hello World".to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert!(paras.is_empty());
+        assert_eq!(buf, "Hello World");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_single_boundary() {
+        let mut buf = "Paragraph one.\n\nParagraph two.".to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert_eq!(paras, vec!["Paragraph one."]);
+        assert_eq!(buf, "Paragraph two.");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_multiple_boundaries() {
+        let mut buf = "A\n\nB\n\nC".to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert_eq!(paras, vec!["A", "B"]);
+        assert_eq!(buf, "C");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_trailing_boundary() {
+        let mut buf = "A\n\nB\n\n".to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert_eq!(paras, vec!["A", "B"]);
+        assert_eq!(buf, "");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_empty_input() {
+        let mut buf = String::new();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert!(paras.is_empty());
+        assert_eq!(buf, "");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_skips_empty_chunks() {
+        // Consecutive double-newlines produce no empty paragraphs.
+        let mut buf = "\n\n\n\nHello".to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert!(paras.is_empty());
+        assert_eq!(buf, "Hello");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_trims_whitespace() {
+        let mut buf = "  Hello  \n\n  World  ".to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert_eq!(paras, vec!["Hello"]);
+        assert_eq!(buf, "  World  ");
+    }
+
+    // -- drain_complete_paragraphs: code-fence awareness -------------------
+
+    #[test]
+    fn drain_complete_paragraphs_code_fence_blank_line_not_split() {
+        // A blank line inside a fenced code block must NOT trigger a split.
+        let mut buf =
+            "```rust\nfn foo() {\n    let x = 1;\n\n    let y = 2;\n}\n```\n\nNext paragraph."
+                .to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert_eq!(
+            paras.len(),
+            1,
+            "code fence with blank line should not be split into multiple messages: {paras:?}"
+        );
+        assert!(
+            paras[0].starts_with("```rust"),
+            "first paragraph should be the code fence: {:?}",
+            paras[0]
+        );
+        assert!(
+            paras[0].contains("let y = 2;"),
+            "code fence should contain content from both sides of the blank line: {:?}",
+            paras[0]
+        );
+        assert_eq!(buf, "Next paragraph.");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_text_before_and_after_fenced_block() {
+        // Text paragraph, then a code block with an internal blank line, then more text.
+        let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string();
+        let paras = drain_complete_paragraphs(&mut buf);
+        assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}");
+        assert_eq!(paras[0], "Before");
+        assert!(
+            paras[1].starts_with("```"),
+            "second paragraph should be the code fence: {:?}",
+            paras[1]
+        );
+        assert!(
+            paras[1].contains("more code"),
+            "code fence content must include the part after the blank line: {:?}",
+            paras[1]
+        );
+        assert_eq!(buf, "After");
+    }
+
+    #[test]
+    fn drain_complete_paragraphs_incremental_simulation() {
+        // Simulate tokens arriving one character at a time.
+        let mut buf = String::new();
+        let mut all_paragraphs = Vec::new();
+
+        for ch in "First para.\n\nSecond para.\n\nThird.".chars() {
+            buf.push(ch);
+            all_paragraphs.extend(drain_complete_paragraphs(&mut buf));
+        }
+
+        assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);
+        assert_eq!(buf, "Third.");
+    }
+}