//! Shared text utilities used by all chat transports. //! //! These functions are transport-agnostic helpers for processing chat messages: //! prefix stripping, bot-mention handling, and paragraph buffering. /// Case-insensitive prefix strip that also requires the match to end at a /// word boundary (whitespace, punctuation, or end-of-string). pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> { let candidate = text.get(..prefix.len())?; if !candidate.eq_ignore_ascii_case(prefix) { return None; } let rest = &text[prefix.len()..]; // Must be at end or followed by non-alphanumeric match rest.chars().next() { None => Some(rest), // exact match, empty remainder Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary _ => Some(rest), } } /// Strip the bot mention prefix from a raw message body. /// /// Handles these forms (case-insensitive where applicable): /// - `@bot_localpart:server.com rest` → `rest` /// - `@bot_localpart rest` → `rest` /// - `DisplayName rest` → `rest` pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str { let trimmed = message.trim(); // Try full Matrix user ID (e.g. "@timmy:homeserver.local") if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) { return rest; } // Try @localpart (e.g. "@timmy") if let Some(localpart) = bot_user_id.split(':').next() && let Some(rest) = strip_prefix_ci(trimmed, localpart) { return rest; } // Try display name (e.g. "Timmy") if let Some(rest) = strip_prefix_ci(trimmed, bot_name) { return rest; } trimmed } /// Returns `true` when `text` ends while inside an open fenced code block. /// /// A fenced code block opens and closes on lines that start with ` ``` ` /// (three or more backticks). We count the fence markers and return `true` /// when the count is odd (a fence was opened but not yet closed). fn is_inside_code_fence(text: &str) -> bool { let mut in_fence = false; for line in text.lines() { if line.trim_start().starts_with("```") { in_fence = !in_fence; } } in_fence } /// Drain all complete paragraphs from `buffer` and return them. /// /// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph /// is trimmed of surrounding whitespace; empty paragraphs are discarded. /// The buffer is left with only the remaining incomplete text. /// /// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code /// block (delimited by ` ``` ` lines) is **not** treated as a paragraph /// boundary. This prevents a blank line inside a code block from splitting /// the fence across multiple messages, which would corrupt the rendering. pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec { let mut paragraphs = Vec::new(); let mut search_from = 0; loop { let Some(pos) = buffer[search_from..].find("\n\n") else { break; }; let abs_pos = search_from + pos; // Only split at this boundary when we are NOT inside a code fence. if is_inside_code_fence(&buffer[..abs_pos]) { // Skip past this \n\n and keep looking for the next boundary. search_from = abs_pos + 2; } else { let chunk = buffer[..abs_pos].trim().to_string(); *buffer = buffer[abs_pos + 2..].to_string(); search_from = 0; if !chunk.is_empty() { paragraphs.push(chunk); } } } paragraphs } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; // -- strip_prefix_ci ---------------------------------------------------- #[test] fn strip_prefix_ci_basic() { assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world")); } #[test] fn strip_prefix_ci_no_match() { assert_eq!(strip_prefix_ci("goodbye", "hello"), None); } #[test] fn strip_prefix_ci_word_boundary_required() { assert_eq!(strip_prefix_ci("helloworld", "hello"), None); } #[test] fn strip_prefix_ci_exact_match() { assert_eq!(strip_prefix_ci("hello", "hello"), Some("")); } #[test] fn strip_prefix_ci_multibyte_no_panic_smart_quote() { // "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5. // A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char. // Previously this caused: "byte index 6 is not a char boundary". let text = "abcde\u{2019}xyz"; assert_eq!(strip_prefix_ci(text, "abcdef"), None); } #[test] fn strip_prefix_ci_multibyte_no_panic_emoji() { // U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it. let text = "abc\u{1F600}def"; assert_eq!(strip_prefix_ci(text, "abcd"), None); } // -- strip_bot_mention -------------------------------------------------- #[test] fn strip_mention_full_user_id() { let rest = strip_bot_mention( "@timmy:homeserver.local help", "Timmy", "@timmy:homeserver.local", ); assert_eq!(rest.trim(), "help"); } #[test] fn strip_mention_localpart() { let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim(), "help me"); } #[test] fn strip_mention_display_name() { let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim(), "help"); } #[test] fn strip_mention_display_name_case_insensitive() { let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim(), "help"); } #[test] fn strip_mention_no_match_returns_original() { let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest, "hello world"); } #[test] fn strip_mention_does_not_match_longer_name() { // "@timmybot" should NOT match "@timmy" let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest, "@timmybot help"); } #[test] fn strip_mention_comma_after_name() { let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim().trim_start_matches(',').trim(), "help"); } // -- drain_complete_paragraphs ------------------------------------------ #[test] fn drain_complete_paragraphs_no_boundary_returns_empty() { let mut buf = "Hello World".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert!(paras.is_empty()); assert_eq!(buf, "Hello World"); } #[test] fn drain_complete_paragraphs_single_boundary() { let mut buf = "Paragraph one.\n\nParagraph two.".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["Paragraph one."]); assert_eq!(buf, "Paragraph two."); } #[test] fn drain_complete_paragraphs_multiple_boundaries() { let mut buf = "A\n\nB\n\nC".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["A", "B"]); assert_eq!(buf, "C"); } #[test] fn drain_complete_paragraphs_trailing_boundary() { let mut buf = "A\n\nB\n\n".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["A", "B"]); assert_eq!(buf, ""); } #[test] fn drain_complete_paragraphs_empty_input() { let mut buf = String::new(); let paras = drain_complete_paragraphs(&mut buf); assert!(paras.is_empty()); assert_eq!(buf, ""); } #[test] fn drain_complete_paragraphs_skips_empty_chunks() { // Consecutive double-newlines produce no empty paragraphs. let mut buf = "\n\n\n\nHello".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert!(paras.is_empty()); assert_eq!(buf, "Hello"); } #[test] fn drain_complete_paragraphs_trims_whitespace() { let mut buf = " Hello \n\n World ".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["Hello"]); assert_eq!(buf, " World "); } // -- drain_complete_paragraphs: code-fence awareness ------------------- #[test] fn drain_complete_paragraphs_code_fence_blank_line_not_split() { // A blank line inside a fenced code block must NOT trigger a split. let mut buf = "```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph." .to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!( paras.len(), 1, "code fence with blank line should not be split into multiple messages: {paras:?}" ); assert!( paras[0].starts_with("```rust"), "first paragraph should be the code fence: {:?}", paras[0] ); assert!( paras[0].contains("let y = 2;"), "code fence should contain content from both sides of the blank line: {:?}", paras[0] ); assert_eq!(buf, "Next paragraph."); } #[test] fn drain_complete_paragraphs_text_before_and_after_fenced_block() { // Text paragraph, then a code block with an internal blank line, then more text. let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}"); assert_eq!(paras[0], "Before"); assert!( paras[1].starts_with("```"), "second paragraph should be the code fence: {:?}", paras[1] ); assert!( paras[1].contains("more code"), "code fence content must include the part after the blank line: {:?}", paras[1] ); assert_eq!(buf, "After"); } #[test] fn drain_complete_paragraphs_incremental_simulation() { // Simulate tokens arriving one character at a time. let mut buf = String::new(); let mut all_paragraphs = Vec::new(); for ch in "First para.\n\nSecond para.\n\nThird.".chars() { buf.push(ch); all_paragraphs.extend(drain_complete_paragraphs(&mut buf)); } assert_eq!(all_paragraphs, vec!["First para.", "Second para."]); assert_eq!(buf, "Third."); } }