storkit: merge 392_refactor_extract_shared_transport_utilities_from_matrix_module_into_chat_submodule
This commit is contained in:
+4
-135
@@ -1,4 +1,4 @@
|
||||
//! Bot-level command registry for the Matrix bot.
|
||||
//! Bot-level command registry shared by all chat transports.
|
||||
//!
|
||||
//! Commands registered here are handled directly by the bot without invoking
|
||||
//! the LLM. The registry is the single source of truth — the `help` command
|
||||
@@ -18,6 +18,7 @@ mod triage;
|
||||
mod unreleased;
|
||||
|
||||
use crate::agents::AgentPool;
|
||||
use crate::chat::util::strip_bot_mention;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, Mutex};
|
||||
@@ -163,8 +164,8 @@ pub fn commands() -> &'static [BotCommand] {
|
||||
|
||||
/// Try to match a user message against a registered bot command.
|
||||
///
|
||||
/// The message is expected to be the raw body text from Matrix (e.g.,
|
||||
/// `"@timmy help"`). The bot mention prefix is stripped before matching.
|
||||
/// The message is expected to be the raw body text (e.g., `"@timmy help"`).
|
||||
/// The bot mention prefix is stripped before matching.
|
||||
///
|
||||
/// Returns `Some(response)` if a command matched and was handled, `None`
|
||||
/// otherwise (the caller should fall through to the LLM).
|
||||
@@ -196,51 +197,6 @@ pub fn try_handle_command(dispatch: &CommandDispatch<'_>, message: &str) -> Opti
|
||||
.and_then(|c| (c.handler)(&ctx))
|
||||
}
|
||||
|
||||
/// Strip the bot mention prefix from a raw message body.
|
||||
///
|
||||
/// Handles these forms (case-insensitive where applicable):
|
||||
/// - `@bot_localpart:server.com rest` → `rest`
|
||||
/// - `@bot_localpart rest` → `rest`
|
||||
/// - `DisplayName rest` → `rest`
|
||||
fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
|
||||
let trimmed = message.trim();
|
||||
|
||||
// Try full Matrix user ID (e.g. "@timmy:homeserver.local")
|
||||
if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
|
||||
return rest;
|
||||
}
|
||||
|
||||
// Try @localpart (e.g. "@timmy")
|
||||
if let Some(localpart) = bot_user_id.split(':').next()
|
||||
&& let Some(rest) = strip_prefix_ci(trimmed, localpart)
|
||||
{
|
||||
return rest;
|
||||
}
|
||||
|
||||
// Try display name (e.g. "Timmy")
|
||||
if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
|
||||
return rest;
|
||||
}
|
||||
|
||||
trimmed
|
||||
}
|
||||
|
||||
/// Case-insensitive prefix strip that also requires the match to end at a
|
||||
/// word boundary (whitespace, punctuation, or end-of-string).
|
||||
fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
|
||||
let candidate = text.get(..prefix.len())?;
|
||||
if !candidate.eq_ignore_ascii_case(prefix) {
|
||||
return None;
|
||||
}
|
||||
let rest = &text[prefix.len()..];
|
||||
// Must be at end or followed by non-alphanumeric
|
||||
match rest.chars().next() {
|
||||
None => Some(rest), // exact match, empty remainder
|
||||
Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
|
||||
_ => Some(rest),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fallback handler for the `htop` command when it is not intercepted by the
|
||||
/// async handler in `on_room_message`. In practice this is never called —
|
||||
/// htop is detected and handled before `try_handle_command` is invoked.
|
||||
@@ -346,55 +302,6 @@ pub(crate) mod tests {
|
||||
// Re-export commands() for submodule tests
|
||||
pub use super::commands;
|
||||
|
||||
// -- strip_bot_mention --------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn strip_mention_full_user_id() {
|
||||
let rest = strip_bot_mention(
|
||||
"@timmy:homeserver.local help",
|
||||
"Timmy",
|
||||
"@timmy:homeserver.local",
|
||||
);
|
||||
assert_eq!(rest.trim(), "help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_localpart() {
|
||||
let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim(), "help me");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_display_name() {
|
||||
let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim(), "help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_display_name_case_insensitive() {
|
||||
let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim(), "help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_no_match_returns_original() {
|
||||
let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest, "hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_does_not_match_longer_name() {
|
||||
// "@timmybot" should NOT match "@timmy"
|
||||
let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest, "@timmybot help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_comma_after_name() {
|
||||
let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");
|
||||
}
|
||||
|
||||
// -- try_handle_command -------------------------------------------------
|
||||
|
||||
#[test]
|
||||
@@ -427,44 +334,6 @@ pub(crate) mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
// -- strip_prefix_ci ----------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_basic() {
|
||||
assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_no_match() {
|
||||
assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_word_boundary_required() {
|
||||
assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_exact_match() {
|
||||
assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_multibyte_no_panic_smart_quote() {
|
||||
// "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5.
|
||||
// A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char.
|
||||
// Previously this caused: "byte index 6 is not a char boundary".
|
||||
let text = "abcde\u{2019}xyz";
|
||||
assert_eq!(strip_prefix_ci(text, "abcdef"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_multibyte_no_panic_emoji() {
|
||||
// U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it.
|
||||
let text = "abc\u{1F600}def";
|
||||
assert_eq!(strip_prefix_ci(text, "abcd"), None);
|
||||
}
|
||||
|
||||
// -- commands registry --------------------------------------------------
|
||||
|
||||
#[test]
|
||||
@@ -4,7 +4,9 @@
|
||||
//! sending and editing messages, allowing the bot logic (commands, htop,
|
||||
//! notifications) to work against any chat platform — Matrix, WhatsApp, etc.
|
||||
|
||||
pub mod commands;
|
||||
pub mod transport;
|
||||
pub mod util;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use crate::agents::AgentPool;
|
||||
use crate::chat::ChatTransport;
|
||||
use crate::chat::util::drain_complete_paragraphs;
|
||||
use crate::http::context::{PermissionDecision, PermissionForward};
|
||||
use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
|
||||
use crate::slog;
|
||||
use crate::chat::ChatTransport;
|
||||
use matrix_sdk::{
|
||||
Client,
|
||||
config::SyncSettings,
|
||||
@@ -1362,59 +1363,6 @@ pub fn markdown_to_html(markdown: &str) -> String {
|
||||
html_output
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Paragraph buffering helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Returns `true` when `text` ends while inside an open fenced code block.
|
||||
///
|
||||
/// A fenced code block opens and closes on lines that start with ` ``` `
|
||||
/// (three or more backticks). We count the fence markers and return `true`
|
||||
/// when the count is odd (a fence was opened but not yet closed).
|
||||
fn is_inside_code_fence(text: &str) -> bool {
|
||||
let mut in_fence = false;
|
||||
for line in text.lines() {
|
||||
if line.trim_start().starts_with("```") {
|
||||
in_fence = !in_fence;
|
||||
}
|
||||
}
|
||||
in_fence
|
||||
}
|
||||
|
||||
/// Drain all complete paragraphs from `buffer` and return them.
|
||||
///
|
||||
/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph
|
||||
/// is trimmed of surrounding whitespace; empty paragraphs are discarded.
|
||||
/// The buffer is left with only the remaining incomplete text.
|
||||
///
|
||||
/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code
|
||||
/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph
|
||||
/// boundary. This prevents a blank line inside a code block from splitting
|
||||
/// the fence across multiple Matrix messages, which would corrupt the
|
||||
/// rendering of the second half.
|
||||
pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {
|
||||
let mut paragraphs = Vec::new();
|
||||
let mut search_from = 0;
|
||||
loop {
|
||||
let Some(pos) = buffer[search_from..].find("\n\n") else {
|
||||
break;
|
||||
};
|
||||
let abs_pos = search_from + pos;
|
||||
// Only split at this boundary when we are NOT inside a code fence.
|
||||
if is_inside_code_fence(&buffer[..abs_pos]) {
|
||||
// Skip past this \n\n and keep looking for the next boundary.
|
||||
search_from = abs_pos + 2;
|
||||
} else {
|
||||
let chunk = buffer[..abs_pos].trim().to_string();
|
||||
*buffer = buffer[abs_pos + 2..].to_string();
|
||||
search_from = 0;
|
||||
if !chunk.is_empty() {
|
||||
paragraphs.push(chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
paragraphs
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
@@ -1623,129 +1571,6 @@ mod tests {
|
||||
let _cloned = ctx.clone();
|
||||
}
|
||||
|
||||
// -- drain_complete_paragraphs ------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_no_boundary_returns_empty() {
|
||||
let mut buf = "Hello World".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert!(paras.is_empty());
|
||||
assert_eq!(buf, "Hello World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_single_boundary() {
|
||||
let mut buf = "Paragraph one.\n\nParagraph two.".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["Paragraph one."]);
|
||||
assert_eq!(buf, "Paragraph two.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_multiple_boundaries() {
|
||||
let mut buf = "A\n\nB\n\nC".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["A", "B"]);
|
||||
assert_eq!(buf, "C");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_trailing_boundary() {
|
||||
let mut buf = "A\n\nB\n\n".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["A", "B"]);
|
||||
assert_eq!(buf, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_empty_input() {
|
||||
let mut buf = String::new();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert!(paras.is_empty());
|
||||
assert_eq!(buf, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_skips_empty_chunks() {
|
||||
// Consecutive double-newlines produce no empty paragraphs.
|
||||
let mut buf = "\n\n\n\nHello".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert!(paras.is_empty());
|
||||
assert_eq!(buf, "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_trims_whitespace() {
|
||||
let mut buf = " Hello \n\n World ".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["Hello"]);
|
||||
assert_eq!(buf, " World ");
|
||||
}
|
||||
|
||||
// -- drain_complete_paragraphs: code-fence awareness -------------------
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_code_fence_blank_line_not_split() {
|
||||
// A blank line inside a fenced code block must NOT trigger a split.
|
||||
// Before the fix the function would split at the blank line and the
|
||||
// second half would be sent without the opening fence, breaking rendering.
|
||||
let mut buf =
|
||||
"```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph."
|
||||
.to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(
|
||||
paras.len(),
|
||||
1,
|
||||
"code fence with blank line should not be split into multiple messages: {paras:?}"
|
||||
);
|
||||
assert!(
|
||||
paras[0].starts_with("```rust"),
|
||||
"first paragraph should be the code fence: {:?}",
|
||||
paras[0]
|
||||
);
|
||||
assert!(
|
||||
paras[0].contains("let y = 2;"),
|
||||
"code fence should contain content from both sides of the blank line: {:?}",
|
||||
paras[0]
|
||||
);
|
||||
assert_eq!(buf, "Next paragraph.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_text_before_and_after_fenced_block() {
|
||||
// Text paragraph, then a code block with an internal blank line, then more text.
|
||||
let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}");
|
||||
assert_eq!(paras[0], "Before");
|
||||
assert!(
|
||||
paras[1].starts_with("```"),
|
||||
"second paragraph should be the code fence: {:?}",
|
||||
paras[1]
|
||||
);
|
||||
assert!(
|
||||
paras[1].contains("more code"),
|
||||
"code fence content must include the part after the blank line: {:?}",
|
||||
paras[1]
|
||||
);
|
||||
assert_eq!(buf, "After");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_incremental_simulation() {
|
||||
// Simulate tokens arriving one character at a time.
|
||||
let mut buf = String::new();
|
||||
let mut all_paragraphs = Vec::new();
|
||||
|
||||
for ch in "First para.\n\nSecond para.\n\nThird.".chars() {
|
||||
buf.push(ch);
|
||||
all_paragraphs.extend(drain_complete_paragraphs(&mut buf));
|
||||
}
|
||||
|
||||
assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);
|
||||
assert_eq!(buf, "Third.");
|
||||
}
|
||||
|
||||
// -- format_user_prompt -------------------------------------------------
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
//! Re-exports from `crate::chat::commands`.
|
||||
//!
|
||||
//! The command dispatch infrastructure has moved to `crate::chat::commands` so
|
||||
//! it can be shared by all transports. This module re-exports everything for
|
||||
//! backwards compatibility with in-tree references.
|
||||
|
||||
pub use crate::chat::commands::*;
|
||||
@@ -18,7 +18,7 @@
|
||||
pub mod assign;
|
||||
mod bot;
|
||||
pub mod commands;
|
||||
mod config;
|
||||
pub(crate) mod config;
|
||||
pub mod delete;
|
||||
pub mod htop;
|
||||
pub mod rebuild;
|
||||
@@ -28,7 +28,7 @@ pub mod start;
|
||||
pub mod notifications;
|
||||
pub mod transport_impl;
|
||||
|
||||
pub use bot::{ConversationEntry, ConversationRole, RoomConversation, drain_complete_paragraphs};
|
||||
pub use bot::{ConversationEntry, ConversationRole, RoomConversation};
|
||||
pub use config::BotConfig;
|
||||
|
||||
use crate::agents::AgentPool;
|
||||
|
||||
@@ -356,14 +356,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn start_command_is_registered() {
|
||||
use crate::chat::transport::matrix::commands::commands;
|
||||
use crate::chat::commands::commands;
|
||||
let found = commands().iter().any(|c| c.name == "start");
|
||||
assert!(found, "start command must be in the registry");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn start_command_appears_in_help() {
|
||||
let result = crate::chat::transport::matrix::commands::tests::try_cmd_addressed(
|
||||
let result = crate::chat::commands::tests::try_cmd_addressed(
|
||||
"Timmy",
|
||||
"@timmy:homeserver.local",
|
||||
"@timmy help",
|
||||
@@ -378,7 +378,7 @@ mod tests {
|
||||
#[test]
|
||||
fn start_command_falls_through_to_none_in_registry() {
|
||||
// The start handler in the registry returns None (handled async in bot.rs).
|
||||
let result = crate::chat::transport::matrix::commands::tests::try_cmd_addressed(
|
||||
let result = crate::chat::commands::tests::try_cmd_addressed(
|
||||
"Timmy",
|
||||
"@timmy:homeserver.local",
|
||||
"@timmy start 42",
|
||||
|
||||
@@ -669,7 +669,7 @@ pub async fn slash_command_receive(
|
||||
format!("{} {keyword} {}", ctx.bot_name, payload.text)
|
||||
};
|
||||
|
||||
use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
|
||||
use crate::chat::commands::{CommandDispatch, try_handle_command};
|
||||
|
||||
let dispatch = CommandDispatch {
|
||||
bot_name: &ctx.bot_name,
|
||||
@@ -702,7 +702,7 @@ async fn handle_incoming_message(
|
||||
user: &str,
|
||||
message: &str,
|
||||
) {
|
||||
use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
|
||||
use crate::chat::commands::{CommandDispatch, try_handle_command};
|
||||
|
||||
let dispatch = CommandDispatch {
|
||||
bot_name: &ctx.bot_name,
|
||||
@@ -815,7 +815,7 @@ async fn handle_llm_message(
|
||||
user_message: &str,
|
||||
) {
|
||||
use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
|
||||
use crate::chat::transport::matrix::drain_complete_paragraphs;
|
||||
use crate::chat::util::drain_complete_paragraphs;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use tokio::sync::watch;
|
||||
|
||||
@@ -1471,7 +1471,7 @@ mod tests {
|
||||
fn slash_command_dispatches_through_command_registry() {
|
||||
// Verify that the synthetic message built by the slash handler
|
||||
// correctly dispatches through try_handle_command.
|
||||
use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
|
||||
use crate::chat::commands::{CommandDispatch, try_handle_command};
|
||||
|
||||
let agents = test_agents();
|
||||
let ambient_rooms = test_ambient_rooms();
|
||||
@@ -1498,7 +1498,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn slash_command_show_passes_args_through_registry() {
|
||||
use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
|
||||
use crate::chat::commands::{CommandDispatch, try_handle_command};
|
||||
|
||||
let agents = test_agents();
|
||||
let ambient_rooms = test_ambient_rooms();
|
||||
|
||||
@@ -978,7 +978,7 @@ pub async fn webhook_receive(
|
||||
|
||||
/// Dispatch an incoming WhatsApp message to bot commands.
|
||||
async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, message: &str) {
|
||||
use crate::chat::transport::matrix::commands::{CommandDispatch, try_handle_command};
|
||||
use crate::chat::commands::{CommandDispatch, try_handle_command};
|
||||
|
||||
// Allowlist check: when configured, silently ignore unauthorized senders.
|
||||
if !ctx.allowed_phones.is_empty()
|
||||
@@ -1071,7 +1071,7 @@ async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, mes
|
||||
|
||||
/// Forward a message to Claude Code and send the response back via WhatsApp.
|
||||
async fn handle_llm_message(ctx: &WhatsAppWebhookContext, sender: &str, user_message: &str) {
|
||||
use crate::chat::transport::matrix::drain_complete_paragraphs;
|
||||
use crate::chat::util::drain_complete_paragraphs;
|
||||
use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use tokio::sync::watch;
|
||||
|
||||
@@ -0,0 +1,315 @@
|
||||
//! Shared text utilities used by all chat transports.
|
||||
//!
|
||||
//! These functions are transport-agnostic helpers for processing chat messages:
|
||||
//! prefix stripping, bot-mention handling, and paragraph buffering.
|
||||
|
||||
/// Case-insensitive prefix strip that also requires the match to end at a
|
||||
/// word boundary (whitespace, punctuation, or end-of-string).
|
||||
pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
|
||||
let candidate = text.get(..prefix.len())?;
|
||||
if !candidate.eq_ignore_ascii_case(prefix) {
|
||||
return None;
|
||||
}
|
||||
let rest = &text[prefix.len()..];
|
||||
// Must be at end or followed by non-alphanumeric
|
||||
match rest.chars().next() {
|
||||
None => Some(rest), // exact match, empty remainder
|
||||
Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
|
||||
_ => Some(rest),
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip the bot mention prefix from a raw message body.
|
||||
///
|
||||
/// Handles these forms (case-insensitive where applicable):
|
||||
/// - `@bot_localpart:server.com rest` → `rest`
|
||||
/// - `@bot_localpart rest` → `rest`
|
||||
/// - `DisplayName rest` → `rest`
|
||||
pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
|
||||
let trimmed = message.trim();
|
||||
|
||||
// Try full Matrix user ID (e.g. "@timmy:homeserver.local")
|
||||
if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
|
||||
return rest;
|
||||
}
|
||||
|
||||
// Try @localpart (e.g. "@timmy")
|
||||
if let Some(localpart) = bot_user_id.split(':').next()
|
||||
&& let Some(rest) = strip_prefix_ci(trimmed, localpart)
|
||||
{
|
||||
return rest;
|
||||
}
|
||||
|
||||
// Try display name (e.g. "Timmy")
|
||||
if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
|
||||
return rest;
|
||||
}
|
||||
|
||||
trimmed
|
||||
}
|
||||
|
||||
/// Returns `true` when `text` ends while inside an open fenced code block.
|
||||
///
|
||||
/// A fenced code block opens and closes on lines that start with ` ``` `
|
||||
/// (three or more backticks). We count the fence markers and return `true`
|
||||
/// when the count is odd (a fence was opened but not yet closed).
|
||||
fn is_inside_code_fence(text: &str) -> bool {
|
||||
let mut in_fence = false;
|
||||
for line in text.lines() {
|
||||
if line.trim_start().starts_with("```") {
|
||||
in_fence = !in_fence;
|
||||
}
|
||||
}
|
||||
in_fence
|
||||
}
|
||||
|
||||
/// Drain all complete paragraphs from `buffer` and return them.
|
||||
///
|
||||
/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph
|
||||
/// is trimmed of surrounding whitespace; empty paragraphs are discarded.
|
||||
/// The buffer is left with only the remaining incomplete text.
|
||||
///
|
||||
/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code
|
||||
/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph
|
||||
/// boundary. This prevents a blank line inside a code block from splitting
|
||||
/// the fence across multiple messages, which would corrupt the rendering.
|
||||
pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {
|
||||
let mut paragraphs = Vec::new();
|
||||
let mut search_from = 0;
|
||||
loop {
|
||||
let Some(pos) = buffer[search_from..].find("\n\n") else {
|
||||
break;
|
||||
};
|
||||
let abs_pos = search_from + pos;
|
||||
// Only split at this boundary when we are NOT inside a code fence.
|
||||
if is_inside_code_fence(&buffer[..abs_pos]) {
|
||||
// Skip past this \n\n and keep looking for the next boundary.
|
||||
search_from = abs_pos + 2;
|
||||
} else {
|
||||
let chunk = buffer[..abs_pos].trim().to_string();
|
||||
*buffer = buffer[abs_pos + 2..].to_string();
|
||||
search_from = 0;
|
||||
if !chunk.is_empty() {
|
||||
paragraphs.push(chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
paragraphs
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// -- strip_prefix_ci ----------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_basic() {
|
||||
assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_no_match() {
|
||||
assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_word_boundary_required() {
|
||||
assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_exact_match() {
|
||||
assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_multibyte_no_panic_smart_quote() {
|
||||
// "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5.
|
||||
// A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char.
|
||||
// Previously this caused: "byte index 6 is not a char boundary".
|
||||
let text = "abcde\u{2019}xyz";
|
||||
assert_eq!(strip_prefix_ci(text, "abcdef"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_prefix_ci_multibyte_no_panic_emoji() {
|
||||
// U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it.
|
||||
let text = "abc\u{1F600}def";
|
||||
assert_eq!(strip_prefix_ci(text, "abcd"), None);
|
||||
}
|
||||
|
||||
// -- strip_bot_mention --------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn strip_mention_full_user_id() {
|
||||
let rest = strip_bot_mention(
|
||||
"@timmy:homeserver.local help",
|
||||
"Timmy",
|
||||
"@timmy:homeserver.local",
|
||||
);
|
||||
assert_eq!(rest.trim(), "help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_localpart() {
|
||||
let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim(), "help me");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_display_name() {
|
||||
let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim(), "help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_display_name_case_insensitive() {
|
||||
let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim(), "help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_no_match_returns_original() {
|
||||
let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest, "hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_does_not_match_longer_name() {
|
||||
// "@timmybot" should NOT match "@timmy"
|
||||
let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest, "@timmybot help");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strip_mention_comma_after_name() {
|
||||
let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
|
||||
assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");
|
||||
}
|
||||
|
||||
// -- drain_complete_paragraphs ------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_no_boundary_returns_empty() {
|
||||
let mut buf = "Hello World".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert!(paras.is_empty());
|
||||
assert_eq!(buf, "Hello World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_single_boundary() {
|
||||
let mut buf = "Paragraph one.\n\nParagraph two.".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["Paragraph one."]);
|
||||
assert_eq!(buf, "Paragraph two.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_multiple_boundaries() {
|
||||
let mut buf = "A\n\nB\n\nC".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["A", "B"]);
|
||||
assert_eq!(buf, "C");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_trailing_boundary() {
|
||||
let mut buf = "A\n\nB\n\n".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["A", "B"]);
|
||||
assert_eq!(buf, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_empty_input() {
|
||||
let mut buf = String::new();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert!(paras.is_empty());
|
||||
assert_eq!(buf, "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_skips_empty_chunks() {
|
||||
// Consecutive double-newlines produce no empty paragraphs.
|
||||
let mut buf = "\n\n\n\nHello".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert!(paras.is_empty());
|
||||
assert_eq!(buf, "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_trims_whitespace() {
|
||||
let mut buf = " Hello \n\n World ".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras, vec!["Hello"]);
|
||||
assert_eq!(buf, " World ");
|
||||
}
|
||||
|
||||
// -- drain_complete_paragraphs: code-fence awareness -------------------
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_code_fence_blank_line_not_split() {
|
||||
// A blank line inside a fenced code block must NOT trigger a split.
|
||||
let mut buf =
|
||||
"```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph."
|
||||
.to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(
|
||||
paras.len(),
|
||||
1,
|
||||
"code fence with blank line should not be split into multiple messages: {paras:?}"
|
||||
);
|
||||
assert!(
|
||||
paras[0].starts_with("```rust"),
|
||||
"first paragraph should be the code fence: {:?}",
|
||||
paras[0]
|
||||
);
|
||||
assert!(
|
||||
paras[0].contains("let y = 2;"),
|
||||
"code fence should contain content from both sides of the blank line: {:?}",
|
||||
paras[0]
|
||||
);
|
||||
assert_eq!(buf, "Next paragraph.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_text_before_and_after_fenced_block() {
|
||||
// Text paragraph, then a code block with an internal blank line, then more text.
|
||||
let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string();
|
||||
let paras = drain_complete_paragraphs(&mut buf);
|
||||
assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}");
|
||||
assert_eq!(paras[0], "Before");
|
||||
assert!(
|
||||
paras[1].starts_with("```"),
|
||||
"second paragraph should be the code fence: {:?}",
|
||||
paras[1]
|
||||
);
|
||||
assert!(
|
||||
paras[1].contains("more code"),
|
||||
"code fence content must include the part after the blank line: {:?}",
|
||||
paras[1]
|
||||
);
|
||||
assert_eq!(buf, "After");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drain_complete_paragraphs_incremental_simulation() {
|
||||
// Simulate tokens arriving one character at a time.
|
||||
let mut buf = String::new();
|
||||
let mut all_paragraphs = Vec::new();
|
||||
|
||||
for ch in "First para.\n\nSecond para.\n\nThird.".chars() {
|
||||
buf.push(ch);
|
||||
all_paragraphs.extend(drain_complete_paragraphs(&mut buf));
|
||||
}
|
||||
|
||||
assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);
|
||||
assert_eq!(buf, "Third.");
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,7 @@
|
||||
//! (it clears local session state and message history) and is not routed here.
|
||||
|
||||
use crate::http::context::{AppContext, OpenApiResult};
|
||||
use crate::chat::transport::matrix::commands::CommandDispatch;
|
||||
use crate::chat::commands::CommandDispatch;
|
||||
use poem::http::StatusCode;
|
||||
use poem_openapi::{Object, OpenApi, Tags, payload::Json};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -112,7 +112,7 @@ fn dispatch_sync(
|
||||
format!("{bot_name} {cmd} {args}")
|
||||
};
|
||||
|
||||
match crate::chat::transport::matrix::commands::try_handle_command(&dispatch, &synthetic) {
|
||||
match crate::chat::commands::try_handle_command(&dispatch, &synthetic) {
|
||||
Some(response) => response,
|
||||
None => {
|
||||
// Command exists in the registry but its fallback handler returns None
|
||||
|
||||
Reference in New Issue
Block a user