diff --git a/server/src/chat/transport/matrix/commands/ambient.rs b/server/src/chat/commands/ambient.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/ambient.rs rename to server/src/chat/commands/ambient.rs diff --git a/server/src/chat/transport/matrix/commands/assign.rs b/server/src/chat/commands/assign.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/assign.rs rename to server/src/chat/commands/assign.rs diff --git a/server/src/chat/transport/matrix/commands/cost.rs b/server/src/chat/commands/cost.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/cost.rs rename to server/src/chat/commands/cost.rs diff --git a/server/src/chat/transport/matrix/commands/git.rs b/server/src/chat/commands/git.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/git.rs rename to server/src/chat/commands/git.rs diff --git a/server/src/chat/transport/matrix/commands/help.rs b/server/src/chat/commands/help.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/help.rs rename to server/src/chat/commands/help.rs diff --git a/server/src/chat/transport/matrix/commands/mod.rs b/server/src/chat/commands/mod.rs similarity index 74% rename from server/src/chat/transport/matrix/commands/mod.rs rename to server/src/chat/commands/mod.rs index 08e21a83..21b06b3f 100644 --- a/server/src/chat/transport/matrix/commands/mod.rs +++ b/server/src/chat/commands/mod.rs @@ -1,4 +1,4 @@ -//! Bot-level command registry for the Matrix bot. +//! Bot-level command registry shared by all chat transports. //! //! Commands registered here are handled directly by the bot without invoking //! the LLM. The registry is the single source of truth — the `help` command @@ -18,6 +18,7 @@ mod triage; mod unreleased; use crate::agents::AgentPool; +use crate::chat::util::strip_bot_mention; use std::collections::HashSet; use std::path::Path; use std::sync::{Arc, Mutex}; @@ -163,8 +164,8 @@ pub fn commands() -> &'static [BotCommand] { /// Try to match a user message against a registered bot command. /// -/// The message is expected to be the raw body text from Matrix (e.g., -/// `"@timmy help"`). The bot mention prefix is stripped before matching. +/// The message is expected to be the raw body text (e.g., `"@timmy help"`). +/// The bot mention prefix is stripped before matching. /// /// Returns `Some(response)` if a command matched and was handled, `None` /// otherwise (the caller should fall through to the LLM). @@ -196,51 +197,6 @@ pub fn try_handle_command(dispatch: &CommandDispatch<'_>, message: &str) -> Opti .and_then(|c| (c.handler)(&ctx)) } -/// Strip the bot mention prefix from a raw message body. -/// -/// Handles these forms (case-insensitive where applicable): -/// - `@bot_localpart:server.com rest` → `rest` -/// - `@bot_localpart rest` → `rest` -/// - `DisplayName rest` → `rest` -fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str { - let trimmed = message.trim(); - - // Try full Matrix user ID (e.g. "@timmy:homeserver.local") - if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) { - return rest; - } - - // Try @localpart (e.g. "@timmy") - if let Some(localpart) = bot_user_id.split(':').next() - && let Some(rest) = strip_prefix_ci(trimmed, localpart) - { - return rest; - } - - // Try display name (e.g. "Timmy") - if let Some(rest) = strip_prefix_ci(trimmed, bot_name) { - return rest; - } - - trimmed -} - -/// Case-insensitive prefix strip that also requires the match to end at a -/// word boundary (whitespace, punctuation, or end-of-string). -fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> { - let candidate = text.get(..prefix.len())?; - if !candidate.eq_ignore_ascii_case(prefix) { - return None; - } - let rest = &text[prefix.len()..]; - // Must be at end or followed by non-alphanumeric - match rest.chars().next() { - None => Some(rest), // exact match, empty remainder - Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary - _ => Some(rest), - } -} - /// Fallback handler for the `htop` command when it is not intercepted by the /// async handler in `on_room_message`. In practice this is never called — /// htop is detected and handled before `try_handle_command` is invoked. @@ -346,55 +302,6 @@ pub(crate) mod tests { // Re-export commands() for submodule tests pub use super::commands; - // -- strip_bot_mention -------------------------------------------------- - - #[test] - fn strip_mention_full_user_id() { - let rest = strip_bot_mention( - "@timmy:homeserver.local help", - "Timmy", - "@timmy:homeserver.local", - ); - assert_eq!(rest.trim(), "help"); - } - - #[test] - fn strip_mention_localpart() { - let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local"); - assert_eq!(rest.trim(), "help me"); - } - - #[test] - fn strip_mention_display_name() { - let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local"); - assert_eq!(rest.trim(), "help"); - } - - #[test] - fn strip_mention_display_name_case_insensitive() { - let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local"); - assert_eq!(rest.trim(), "help"); - } - - #[test] - fn strip_mention_no_match_returns_original() { - let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local"); - assert_eq!(rest, "hello world"); - } - - #[test] - fn strip_mention_does_not_match_longer_name() { - // "@timmybot" should NOT match "@timmy" - let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local"); - assert_eq!(rest, "@timmybot help"); - } - - #[test] - fn strip_mention_comma_after_name() { - let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local"); - assert_eq!(rest.trim().trim_start_matches(',').trim(), "help"); - } - // -- try_handle_command ------------------------------------------------- #[test] @@ -427,44 +334,6 @@ pub(crate) mod tests { ); } - // -- strip_prefix_ci ---------------------------------------------------- - - #[test] - fn strip_prefix_ci_basic() { - assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world")); - } - - #[test] - fn strip_prefix_ci_no_match() { - assert_eq!(strip_prefix_ci("goodbye", "hello"), None); - } - - #[test] - fn strip_prefix_ci_word_boundary_required() { - assert_eq!(strip_prefix_ci("helloworld", "hello"), None); - } - - #[test] - fn strip_prefix_ci_exact_match() { - assert_eq!(strip_prefix_ci("hello", "hello"), Some("")); - } - - #[test] - fn strip_prefix_ci_multibyte_no_panic_smart_quote() { - // "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5. - // A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char. - // Previously this caused: "byte index 6 is not a char boundary". - let text = "abcde\u{2019}xyz"; - assert_eq!(strip_prefix_ci(text, "abcdef"), None); - } - - #[test] - fn strip_prefix_ci_multibyte_no_panic_emoji() { - // U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it. - let text = "abc\u{1F600}def"; - assert_eq!(strip_prefix_ci(text, "abcd"), None); - } - // -- commands registry -------------------------------------------------- #[test] diff --git a/server/src/chat/transport/matrix/commands/move_story.rs b/server/src/chat/commands/move_story.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/move_story.rs rename to server/src/chat/commands/move_story.rs diff --git a/server/src/chat/transport/matrix/commands/overview.rs b/server/src/chat/commands/overview.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/overview.rs rename to server/src/chat/commands/overview.rs diff --git a/server/src/chat/transport/matrix/commands/show.rs b/server/src/chat/commands/show.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/show.rs rename to server/src/chat/commands/show.rs diff --git a/server/src/chat/transport/matrix/commands/status.rs b/server/src/chat/commands/status.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/status.rs rename to server/src/chat/commands/status.rs diff --git a/server/src/chat/transport/matrix/commands/triage.rs b/server/src/chat/commands/triage.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/triage.rs rename to server/src/chat/commands/triage.rs diff --git a/server/src/chat/transport/matrix/commands/unreleased.rs b/server/src/chat/commands/unreleased.rs similarity index 100% rename from server/src/chat/transport/matrix/commands/unreleased.rs rename to server/src/chat/commands/unreleased.rs diff --git a/server/src/chat/mod.rs b/server/src/chat/mod.rs index 39e485e1..c96afcee 100644 --- a/server/src/chat/mod.rs +++ b/server/src/chat/mod.rs @@ -4,7 +4,9 @@ //! sending and editing messages, allowing the bot logic (commands, htop, //! notifications) to work against any chat platform — Matrix, WhatsApp, etc. +pub mod commands; pub mod transport; +pub mod util; use async_trait::async_trait; diff --git a/server/src/chat/transport/matrix/bot.rs b/server/src/chat/transport/matrix/bot.rs index 6bbeacac..188c6d2a 100644 --- a/server/src/chat/transport/matrix/bot.rs +++ b/server/src/chat/transport/matrix/bot.rs @@ -1,8 +1,9 @@ use crate::agents::AgentPool; +use crate::chat::ChatTransport; +use crate::chat::util::drain_complete_paragraphs; use crate::http::context::{PermissionDecision, PermissionForward}; use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult}; use crate::slog; -use crate::chat::ChatTransport; use matrix_sdk::{ Client, config::SyncSettings, @@ -1362,59 +1363,6 @@ pub fn markdown_to_html(markdown: &str) -> String { html_output } -// --------------------------------------------------------------------------- -// Paragraph buffering helper -// --------------------------------------------------------------------------- - -/// Returns `true` when `text` ends while inside an open fenced code block. -/// -/// A fenced code block opens and closes on lines that start with ` ``` ` -/// (three or more backticks). We count the fence markers and return `true` -/// when the count is odd (a fence was opened but not yet closed). -fn is_inside_code_fence(text: &str) -> bool { - let mut in_fence = false; - for line in text.lines() { - if line.trim_start().starts_with("```") { - in_fence = !in_fence; - } - } - in_fence -} - -/// Drain all complete paragraphs from `buffer` and return them. -/// -/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph -/// is trimmed of surrounding whitespace; empty paragraphs are discarded. -/// The buffer is left with only the remaining incomplete text. -/// -/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code -/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph -/// boundary. This prevents a blank line inside a code block from splitting -/// the fence across multiple Matrix messages, which would corrupt the -/// rendering of the second half. -pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec { - let mut paragraphs = Vec::new(); - let mut search_from = 0; - loop { - let Some(pos) = buffer[search_from..].find("\n\n") else { - break; - }; - let abs_pos = search_from + pos; - // Only split at this boundary when we are NOT inside a code fence. - if is_inside_code_fence(&buffer[..abs_pos]) { - // Skip past this \n\n and keep looking for the next boundary. - search_from = abs_pos + 2; - } else { - let chunk = buffer[..abs_pos].trim().to_string(); - *buffer = buffer[abs_pos + 2..].to_string(); - search_from = 0; - if !chunk.is_empty() { - paragraphs.push(chunk); - } - } - } - paragraphs -} // --------------------------------------------------------------------------- // Tests @@ -1623,129 +1571,6 @@ mod tests { let _cloned = ctx.clone(); } - // -- drain_complete_paragraphs ------------------------------------------ - - #[test] - fn drain_complete_paragraphs_no_boundary_returns_empty() { - let mut buf = "Hello World".to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert!(paras.is_empty()); - assert_eq!(buf, "Hello World"); - } - - #[test] - fn drain_complete_paragraphs_single_boundary() { - let mut buf = "Paragraph one.\n\nParagraph two.".to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert_eq!(paras, vec!["Paragraph one."]); - assert_eq!(buf, "Paragraph two."); - } - - #[test] - fn drain_complete_paragraphs_multiple_boundaries() { - let mut buf = "A\n\nB\n\nC".to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert_eq!(paras, vec!["A", "B"]); - assert_eq!(buf, "C"); - } - - #[test] - fn drain_complete_paragraphs_trailing_boundary() { - let mut buf = "A\n\nB\n\n".to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert_eq!(paras, vec!["A", "B"]); - assert_eq!(buf, ""); - } - - #[test] - fn drain_complete_paragraphs_empty_input() { - let mut buf = String::new(); - let paras = drain_complete_paragraphs(&mut buf); - assert!(paras.is_empty()); - assert_eq!(buf, ""); - } - - #[test] - fn drain_complete_paragraphs_skips_empty_chunks() { - // Consecutive double-newlines produce no empty paragraphs. - let mut buf = "\n\n\n\nHello".to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert!(paras.is_empty()); - assert_eq!(buf, "Hello"); - } - - #[test] - fn drain_complete_paragraphs_trims_whitespace() { - let mut buf = " Hello \n\n World ".to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert_eq!(paras, vec!["Hello"]); - assert_eq!(buf, " World "); - } - - // -- drain_complete_paragraphs: code-fence awareness ------------------- - - #[test] - fn drain_complete_paragraphs_code_fence_blank_line_not_split() { - // A blank line inside a fenced code block must NOT trigger a split. - // Before the fix the function would split at the blank line and the - // second half would be sent without the opening fence, breaking rendering. - let mut buf = - "```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph." - .to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert_eq!( - paras.len(), - 1, - "code fence with blank line should not be split into multiple messages: {paras:?}" - ); - assert!( - paras[0].starts_with("```rust"), - "first paragraph should be the code fence: {:?}", - paras[0] - ); - assert!( - paras[0].contains("let y = 2;"), - "code fence should contain content from both sides of the blank line: {:?}", - paras[0] - ); - assert_eq!(buf, "Next paragraph."); - } - - #[test] - fn drain_complete_paragraphs_text_before_and_after_fenced_block() { - // Text paragraph, then a code block with an internal blank line, then more text. - let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string(); - let paras = drain_complete_paragraphs(&mut buf); - assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}"); - assert_eq!(paras[0], "Before"); - assert!( - paras[1].starts_with("```"), - "second paragraph should be the code fence: {:?}", - paras[1] - ); - assert!( - paras[1].contains("more code"), - "code fence content must include the part after the blank line: {:?}", - paras[1] - ); - assert_eq!(buf, "After"); - } - - #[test] - fn drain_complete_paragraphs_incremental_simulation() { - // Simulate tokens arriving one character at a time. - let mut buf = String::new(); - let mut all_paragraphs = Vec::new(); - - for ch in "First para.\n\nSecond para.\n\nThird.".chars() { - buf.push(ch); - all_paragraphs.extend(drain_complete_paragraphs(&mut buf)); - } - - assert_eq!(all_paragraphs, vec!["First para.", "Second para."]); - assert_eq!(buf, "Third."); - } - // -- format_user_prompt ------------------------------------------------- #[test] diff --git a/server/src/chat/transport/matrix/commands.rs b/server/src/chat/transport/matrix/commands.rs new file mode 100644 index 00000000..79304ac1 --- /dev/null +++ b/server/src/chat/transport/matrix/commands.rs @@ -0,0 +1,7 @@ +//! Re-exports from `crate::chat::commands`. +//! +//! The command dispatch infrastructure has moved to `crate::chat::commands` so +//! it can be shared by all transports. This module re-exports everything for +//! backwards compatibility with in-tree references. + +pub use crate::chat::commands::*; diff --git a/server/src/chat/transport/matrix/mod.rs b/server/src/chat/transport/matrix/mod.rs index 1f20a844..801a3744 100644 --- a/server/src/chat/transport/matrix/mod.rs +++ b/server/src/chat/transport/matrix/mod.rs @@ -18,7 +18,7 @@ pub mod assign; mod bot; pub mod commands; -mod config; +pub(crate) mod config; pub mod delete; pub mod htop; pub mod rebuild; @@ -28,7 +28,7 @@ pub mod start; pub mod notifications; pub mod transport_impl; -pub use bot::{ConversationEntry, ConversationRole, RoomConversation, drain_complete_paragraphs}; +pub use bot::{ConversationEntry, ConversationRole, RoomConversation}; pub use config::BotConfig; use crate::agents::AgentPool; diff --git a/server/src/chat/transport/matrix/start.rs b/server/src/chat/transport/matrix/start.rs index 637bb9e0..683b176a 100644 --- a/server/src/chat/transport/matrix/start.rs +++ b/server/src/chat/transport/matrix/start.rs @@ -356,14 +356,14 @@ mod tests { #[test] fn start_command_is_registered() { - use crate::chat::transport::matrix::commands::commands; + use crate::chat::commands::commands; let found = commands().iter().any(|c| c.name == "start"); assert!(found, "start command must be in the registry"); } #[test] fn start_command_appears_in_help() { - let result = crate::chat::transport::matrix::commands::tests::try_cmd_addressed( + let result = crate::chat::commands::tests::try_cmd_addressed( "Timmy", "@timmy:homeserver.local", "@timmy help", @@ -378,7 +378,7 @@ mod tests { #[test] fn start_command_falls_through_to_none_in_registry() { // The start handler in the registry returns None (handled async in bot.rs). - let result = crate::chat::transport::matrix::commands::tests::try_cmd_addressed( + let result = crate::chat::commands::tests::try_cmd_addressed( "Timmy", "@timmy:homeserver.local", "@timmy start 42", diff --git a/server/src/chat/transport/slack.rs b/server/src/chat/transport/slack.rs index be2ca876..7a1f0f1a 100644 --- a/server/src/chat/transport/slack.rs +++ b/server/src/chat/transport/slack.rs @@ -669,7 +669,7 @@ pub async fn slash_command_receive( format!("{} {keyword} {}", ctx.bot_name, payload.text) }; - use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command}; + use crate::chat::commands::{CommandDispatch, try_handle_command}; let dispatch = CommandDispatch { bot_name: &ctx.bot_name, @@ -702,7 +702,7 @@ async fn handle_incoming_message( user: &str, message: &str, ) { - use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command}; + use crate::chat::commands::{CommandDispatch, try_handle_command}; let dispatch = CommandDispatch { bot_name: &ctx.bot_name, @@ -815,7 +815,7 @@ async fn handle_llm_message( user_message: &str, ) { use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult}; - use crate::chat::transport::matrix::drain_complete_paragraphs; + use crate::chat::util::drain_complete_paragraphs; use std::sync::atomic::{AtomicBool, Ordering}; use tokio::sync::watch; @@ -1471,7 +1471,7 @@ mod tests { fn slash_command_dispatches_through_command_registry() { // Verify that the synthetic message built by the slash handler // correctly dispatches through try_handle_command. - use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command}; + use crate::chat::commands::{CommandDispatch, try_handle_command}; let agents = test_agents(); let ambient_rooms = test_ambient_rooms(); @@ -1498,7 +1498,7 @@ mod tests { #[test] fn slash_command_show_passes_args_through_registry() { - use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command}; + use crate::chat::commands::{CommandDispatch, try_handle_command}; let agents = test_agents(); let ambient_rooms = test_ambient_rooms(); diff --git a/server/src/chat/transport/whatsapp.rs b/server/src/chat/transport/whatsapp.rs index 11edda17..21083720 100644 --- a/server/src/chat/transport/whatsapp.rs +++ b/server/src/chat/transport/whatsapp.rs @@ -978,7 +978,7 @@ pub async fn webhook_receive( /// Dispatch an incoming WhatsApp message to bot commands. async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, message: &str) { - use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command}; + use crate::chat::commands::{CommandDispatch, try_handle_command}; // Allowlist check: when configured, silently ignore unauthorized senders. if !ctx.allowed_phones.is_empty() @@ -1071,7 +1071,7 @@ async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, mes /// Forward a message to Claude Code and send the response back via WhatsApp. async fn handle_llm_message(ctx: &WhatsAppWebhookContext, sender: &str, user_message: &str) { - use crate::chat::transport::matrix::drain_complete_paragraphs; + use crate::chat::util::drain_complete_paragraphs; use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult}; use std::sync::atomic::{AtomicBool, Ordering}; use tokio::sync::watch; diff --git a/server/src/chat/util.rs b/server/src/chat/util.rs new file mode 100644 index 00000000..0b9e20c6 --- /dev/null +++ b/server/src/chat/util.rs @@ -0,0 +1,315 @@ +//! Shared text utilities used by all chat transports. +//! +//! These functions are transport-agnostic helpers for processing chat messages: +//! prefix stripping, bot-mention handling, and paragraph buffering. + +/// Case-insensitive prefix strip that also requires the match to end at a +/// word boundary (whitespace, punctuation, or end-of-string). +pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> { + let candidate = text.get(..prefix.len())?; + if !candidate.eq_ignore_ascii_case(prefix) { + return None; + } + let rest = &text[prefix.len()..]; + // Must be at end or followed by non-alphanumeric + match rest.chars().next() { + None => Some(rest), // exact match, empty remainder + Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary + _ => Some(rest), + } +} + +/// Strip the bot mention prefix from a raw message body. +/// +/// Handles these forms (case-insensitive where applicable): +/// - `@bot_localpart:server.com rest` → `rest` +/// - `@bot_localpart rest` → `rest` +/// - `DisplayName rest` → `rest` +pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str { + let trimmed = message.trim(); + + // Try full Matrix user ID (e.g. "@timmy:homeserver.local") + if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) { + return rest; + } + + // Try @localpart (e.g. "@timmy") + if let Some(localpart) = bot_user_id.split(':').next() + && let Some(rest) = strip_prefix_ci(trimmed, localpart) + { + return rest; + } + + // Try display name (e.g. "Timmy") + if let Some(rest) = strip_prefix_ci(trimmed, bot_name) { + return rest; + } + + trimmed +} + +/// Returns `true` when `text` ends while inside an open fenced code block. +/// +/// A fenced code block opens and closes on lines that start with ` ``` ` +/// (three or more backticks). We count the fence markers and return `true` +/// when the count is odd (a fence was opened but not yet closed). +fn is_inside_code_fence(text: &str) -> bool { + let mut in_fence = false; + for line in text.lines() { + if line.trim_start().starts_with("```") { + in_fence = !in_fence; + } + } + in_fence +} + +/// Drain all complete paragraphs from `buffer` and return them. +/// +/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph +/// is trimmed of surrounding whitespace; empty paragraphs are discarded. +/// The buffer is left with only the remaining incomplete text. +/// +/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code +/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph +/// boundary. This prevents a blank line inside a code block from splitting +/// the fence across multiple messages, which would corrupt the rendering. +pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec { + let mut paragraphs = Vec::new(); + let mut search_from = 0; + loop { + let Some(pos) = buffer[search_from..].find("\n\n") else { + break; + }; + let abs_pos = search_from + pos; + // Only split at this boundary when we are NOT inside a code fence. + if is_inside_code_fence(&buffer[..abs_pos]) { + // Skip past this \n\n and keep looking for the next boundary. + search_from = abs_pos + 2; + } else { + let chunk = buffer[..abs_pos].trim().to_string(); + *buffer = buffer[abs_pos + 2..].to_string(); + search_from = 0; + if !chunk.is_empty() { + paragraphs.push(chunk); + } + } + } + paragraphs +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + // -- strip_prefix_ci ---------------------------------------------------- + + #[test] + fn strip_prefix_ci_basic() { + assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world")); + } + + #[test] + fn strip_prefix_ci_no_match() { + assert_eq!(strip_prefix_ci("goodbye", "hello"), None); + } + + #[test] + fn strip_prefix_ci_word_boundary_required() { + assert_eq!(strip_prefix_ci("helloworld", "hello"), None); + } + + #[test] + fn strip_prefix_ci_exact_match() { + assert_eq!(strip_prefix_ci("hello", "hello"), Some("")); + } + + #[test] + fn strip_prefix_ci_multibyte_no_panic_smart_quote() { + // "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5. + // A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char. + // Previously this caused: "byte index 6 is not a char boundary". + let text = "abcde\u{2019}xyz"; + assert_eq!(strip_prefix_ci(text, "abcdef"), None); + } + + #[test] + fn strip_prefix_ci_multibyte_no_panic_emoji() { + // U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it. + let text = "abc\u{1F600}def"; + assert_eq!(strip_prefix_ci(text, "abcd"), None); + } + + // -- strip_bot_mention -------------------------------------------------- + + #[test] + fn strip_mention_full_user_id() { + let rest = strip_bot_mention( + "@timmy:homeserver.local help", + "Timmy", + "@timmy:homeserver.local", + ); + assert_eq!(rest.trim(), "help"); + } + + #[test] + fn strip_mention_localpart() { + let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local"); + assert_eq!(rest.trim(), "help me"); + } + + #[test] + fn strip_mention_display_name() { + let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local"); + assert_eq!(rest.trim(), "help"); + } + + #[test] + fn strip_mention_display_name_case_insensitive() { + let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local"); + assert_eq!(rest.trim(), "help"); + } + + #[test] + fn strip_mention_no_match_returns_original() { + let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local"); + assert_eq!(rest, "hello world"); + } + + #[test] + fn strip_mention_does_not_match_longer_name() { + // "@timmybot" should NOT match "@timmy" + let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local"); + assert_eq!(rest, "@timmybot help"); + } + + #[test] + fn strip_mention_comma_after_name() { + let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local"); + assert_eq!(rest.trim().trim_start_matches(',').trim(), "help"); + } + + // -- drain_complete_paragraphs ------------------------------------------ + + #[test] + fn drain_complete_paragraphs_no_boundary_returns_empty() { + let mut buf = "Hello World".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert!(paras.is_empty()); + assert_eq!(buf, "Hello World"); + } + + #[test] + fn drain_complete_paragraphs_single_boundary() { + let mut buf = "Paragraph one.\n\nParagraph two.".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!(paras, vec!["Paragraph one."]); + assert_eq!(buf, "Paragraph two."); + } + + #[test] + fn drain_complete_paragraphs_multiple_boundaries() { + let mut buf = "A\n\nB\n\nC".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!(paras, vec!["A", "B"]); + assert_eq!(buf, "C"); + } + + #[test] + fn drain_complete_paragraphs_trailing_boundary() { + let mut buf = "A\n\nB\n\n".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!(paras, vec!["A", "B"]); + assert_eq!(buf, ""); + } + + #[test] + fn drain_complete_paragraphs_empty_input() { + let mut buf = String::new(); + let paras = drain_complete_paragraphs(&mut buf); + assert!(paras.is_empty()); + assert_eq!(buf, ""); + } + + #[test] + fn drain_complete_paragraphs_skips_empty_chunks() { + // Consecutive double-newlines produce no empty paragraphs. + let mut buf = "\n\n\n\nHello".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert!(paras.is_empty()); + assert_eq!(buf, "Hello"); + } + + #[test] + fn drain_complete_paragraphs_trims_whitespace() { + let mut buf = " Hello \n\n World ".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!(paras, vec!["Hello"]); + assert_eq!(buf, " World "); + } + + // -- drain_complete_paragraphs: code-fence awareness ------------------- + + #[test] + fn drain_complete_paragraphs_code_fence_blank_line_not_split() { + // A blank line inside a fenced code block must NOT trigger a split. + let mut buf = + "```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph." + .to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!( + paras.len(), + 1, + "code fence with blank line should not be split into multiple messages: {paras:?}" + ); + assert!( + paras[0].starts_with("```rust"), + "first paragraph should be the code fence: {:?}", + paras[0] + ); + assert!( + paras[0].contains("let y = 2;"), + "code fence should contain content from both sides of the blank line: {:?}", + paras[0] + ); + assert_eq!(buf, "Next paragraph."); + } + + #[test] + fn drain_complete_paragraphs_text_before_and_after_fenced_block() { + // Text paragraph, then a code block with an internal blank line, then more text. + let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}"); + assert_eq!(paras[0], "Before"); + assert!( + paras[1].starts_with("```"), + "second paragraph should be the code fence: {:?}", + paras[1] + ); + assert!( + paras[1].contains("more code"), + "code fence content must include the part after the blank line: {:?}", + paras[1] + ); + assert_eq!(buf, "After"); + } + + #[test] + fn drain_complete_paragraphs_incremental_simulation() { + // Simulate tokens arriving one character at a time. + let mut buf = String::new(); + let mut all_paragraphs = Vec::new(); + + for ch in "First para.\n\nSecond para.\n\nThird.".chars() { + buf.push(ch); + all_paragraphs.extend(drain_complete_paragraphs(&mut buf)); + } + + assert_eq!(all_paragraphs, vec!["First para.", "Second para."]); + assert_eq!(buf, "Third."); + } +} diff --git a/server/src/http/bot_command.rs b/server/src/http/bot_command.rs index d8dbc51a..d7212f87 100644 --- a/server/src/http/bot_command.rs +++ b/server/src/http/bot_command.rs @@ -10,7 +10,7 @@ //! (it clears local session state and message history) and is not routed here. use crate::http::context::{AppContext, OpenApiResult}; -use crate::chat::transport::matrix::commands::CommandDispatch; +use crate::chat::commands::CommandDispatch; use poem::http::StatusCode; use poem_openapi::{Object, OpenApi, Tags, payload::Json}; use serde::{Deserialize, Serialize}; @@ -112,7 +112,7 @@ fn dispatch_sync( format!("{bot_name} {cmd} {args}") }; - match crate::chat::transport::matrix::commands::try_handle_command(&dispatch, &synthetic) { + match crate::chat::commands::try_handle_command(&dispatch, &synthetic) { Some(response) => response, None => { // Command exists in the registry but its fallback handler returns None