//! Shared text utilities used by all chat transports. //! //! These functions are transport-agnostic helpers for processing chat messages: //! prefix stripping, bot-mention handling, and paragraph buffering. /// Returns `true` if the message body is an affirmative permission response. /// /// Recognised affirmative tokens (case-insensitive): `yes`, `y`, `approve`, /// `allow`, `ok`. Anything else — including ambiguous text — is treated as /// denial (fail-closed). /// /// A leading `@mention` (e.g. `"@timmy yes"`) is stripped before checking, so /// the bot name does not interfere with the result. pub fn is_permission_approval(body: &str) -> bool { // Strip a leading @mention (e.g. "@timmy yes") so the bot name doesn't // interfere with the check. let trimmed = body .trim() .trim_start_matches('@') .split_whitespace() .last() .unwrap_or("") .to_ascii_lowercase(); matches!(trimmed.as_str(), "yes" | "y" | "approve" | "allow" | "ok") } /// Case-insensitive prefix strip that also requires the match to end at a /// word boundary (whitespace, punctuation, or end-of-string). pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> { let candidate = text.get(..prefix.len())?; if !candidate.eq_ignore_ascii_case(prefix) { return None; } let rest = &text[prefix.len()..]; // Must be at end or followed by non-alphanumeric match rest.chars().next() { None => Some(rest), // exact match, empty remainder Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary _ => Some(rest), } } /// Strip the bot mention prefix from a raw message body. /// /// Handles these forms (case-insensitive where applicable): /// - `@bot_localpart:server.com rest` → `rest` /// - `@bot_localpart rest` → `rest` /// - `DisplayName rest` → `rest` /// - `DisplayName: rest` → `rest` (Element tab-completion inserts a colon) /// - `DisplayName, rest` → `rest` (Element tab-completion may insert a comma) /// - `DisplayName ⚡️: rest` → `rest` (display name with emoji) /// - `[DisplayName](https://matrix.to/#/@user:server) rest` → `rest` (Element mention pill) pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str { let trimmed = message.trim(); // Try Element Markdown mention pill format: // "[DisplayName](https://matrix.to/#/@user:server) rest" if trimmed.starts_with('[') && let Some(after_label) = trimmed.find("](https://matrix.to/#/") { let url_start = after_label + 2; // skip "](" let url_content = &trimmed[url_start..]; // "https://matrix.to/#/@user:server) rest" if let Some(close_paren) = url_content.find(')') { let url = &url_content[..close_paren]; // "https://matrix.to/#/@user:server" let matrix_prefix = "https://matrix.to/#/"; if let Some(mentioned_id) = url.strip_prefix(matrix_prefix) && mentioned_id.eq_ignore_ascii_case(bot_user_id) { let rest = &url_content[close_paren + 1..]; return strip_mention_separator(rest); } } } // Try full Matrix user ID (e.g. "@timmy:homeserver.local") if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) { return strip_mention_separator(rest); } // Try @localpart (e.g. "@timmy") if let Some(localpart) = bot_user_id.split(':').next() && let Some(rest) = strip_prefix_ci(trimmed, localpart) { return strip_mention_separator(rest); } // Try display name (e.g. "Timmy" or "timmy ⚡️") if let Some(rest) = strip_prefix_ci(trimmed, bot_name) { return strip_mention_separator(rest); } trimmed } /// Strip decoration between a bot mention and the command text. /// /// After the bot name/ID is stripped, what remains may include whitespace, /// emoji from display names (e.g. `Timmy ⚡️`), and Element tab-completion /// separators (`:` or `,`). This function skips all of that and returns a /// slice starting at the first ASCII alphanumeric character (the command). fn strip_mention_separator(rest: &str) -> &str { let byte_skip = rest .char_indices() .find(|(_, c)| c.is_ascii_alphanumeric()) .map(|(i, _)| i) .unwrap_or(rest.len()); &rest[byte_skip..] } /// Returns `true` when `text` ends while inside an open fenced code block. /// /// A fenced code block opens and closes on lines that start with ` ``` ` /// (three or more backticks). We count the fence markers and return `true` /// when the count is odd (a fence was opened but not yet closed). fn is_inside_code_fence(text: &str) -> bool { let mut in_fence = false; for line in text.lines() { if line.trim_start().starts_with("```") { in_fence = !in_fence; } } in_fence } /// Drain all complete paragraphs from `buffer` and return them. /// /// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph /// is trimmed of surrounding whitespace; empty paragraphs are discarded. /// The buffer is left with only the remaining incomplete text. /// /// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code /// block (delimited by ` ``` ` lines) is **not** treated as a paragraph /// boundary. This prevents a blank line inside a code block from splitting /// the fence across multiple messages, which would corrupt the rendering. pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec { let mut paragraphs = Vec::new(); let mut search_from = 0; loop { let Some(pos) = buffer[search_from..].find("\n\n") else { break; }; let abs_pos = search_from + pos; // Only split at this boundary when we are NOT inside a code fence. if is_inside_code_fence(&buffer[..abs_pos]) { // Skip past this \n\n and keep looking for the next boundary. search_from = abs_pos + 2; } else { let chunk = buffer[..abs_pos].trim().to_string(); *buffer = buffer[abs_pos + 2..].to_string(); search_from = 0; if !chunk.is_empty() { paragraphs.push(chunk); } } } paragraphs } /// Normalize single newlines between prose lines to double newlines. /// /// LLMs sometimes output text with single newlines between sentences, e.g.: /// ```text /// Sentence one. /// Sentence two. /// ``` /// /// In Markdown a single newline is a *soft break* and may render as a space /// (or nothing), causing sentences to appear joined ("sentence one.Sentence /// two"). This function converts single newlines between non-empty prose /// lines into double newlines (paragraph breaks) so they render correctly. /// /// Single newlines are **preserved** (not doubled) when either the preceding /// or following line is a structured Markdown element: /// - Bullet list items (`- `, `* `, `+ `) /// - Ordered list items (`1. `, `2. `, …) /// - ATX headings (`#`, `##`, …) /// - Table rows (`|`) /// - Code fence delimiters (`` ``` ``) /// /// Content inside fenced code blocks is also preserved verbatim. pub fn normalize_line_breaks(text: &str) -> String { fn is_structured_line(line: &str) -> bool { let trimmed = line.trim_start(); if trimmed.is_empty() { return false; } if trimmed.starts_with('#') || trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") || trimmed.starts_with('|') || trimmed.starts_with("```") { return true; } // Ordered list: one or more digits followed by ". " let after_digits = trimmed.trim_start_matches(|c: char| c.is_ascii_digit()); if !after_digits.is_empty() && after_digits.starts_with(". ") && after_digits.len() < trimmed.len() { return true; } // Horizontal rules: lines made entirely of -, *, or _ (at least 3 chars). let all_hr_chars = trimmed.chars().all(|c| matches!(c, '-' | '*' | '_' | ' ')); let hr_char_count = trimmed.chars().filter(|c| !c.is_whitespace()).count(); all_hr_chars && hr_char_count >= 3 } let lines: Vec<&str> = text.split('\n').collect(); let mut result: Vec<&str> = Vec::with_capacity(lines.len() * 2); let mut in_code_fence = false; for (i, &line) in lines.iter().enumerate() { if line.trim_start().starts_with("```") { in_code_fence = !in_code_fence; } if i == 0 || in_code_fence { result.push(line); continue; } let prev_line = lines[i - 1]; // Insert a blank separator when both the current and previous lines // are non-empty prose (not inside a code fence, not structured Markdown). let should_double = !line.is_empty() && !prev_line.is_empty() && !is_structured_line(line) && !is_structured_line(prev_line); if should_double { result.push(""); } result.push(line); } result.join("\n") } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; // -- is_permission_approval --------------------------------------------- #[test] fn is_permission_approval_accepts_yes_variants() { assert!(is_permission_approval("yes")); assert!(is_permission_approval("Yes")); assert!(is_permission_approval("YES")); assert!(is_permission_approval("y")); assert!(is_permission_approval("Y")); assert!(is_permission_approval("approve")); assert!(is_permission_approval("allow")); assert!(is_permission_approval("ok")); assert!(is_permission_approval("OK")); } #[test] fn is_permission_approval_denies_no_and_other() { assert!(!is_permission_approval("no")); assert!(!is_permission_approval("No")); assert!(!is_permission_approval("n")); assert!(!is_permission_approval("deny")); assert!(!is_permission_approval("reject")); assert!(!is_permission_approval("maybe")); assert!(!is_permission_approval("")); assert!(!is_permission_approval("yes please do it")); } #[test] fn is_permission_approval_strips_at_mention_prefix() { assert!(is_permission_approval("@timmy yes")); assert!(!is_permission_approval("@timmy no")); } #[test] fn is_permission_approval_handles_whitespace() { assert!(is_permission_approval(" yes ")); assert!(is_permission_approval("\tyes\n")); } // -- strip_prefix_ci ---------------------------------------------------- #[test] fn strip_prefix_ci_basic() { assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world")); } #[test] fn strip_prefix_ci_no_match() { assert_eq!(strip_prefix_ci("goodbye", "hello"), None); } #[test] fn strip_prefix_ci_word_boundary_required() { assert_eq!(strip_prefix_ci("helloworld", "hello"), None); } #[test] fn strip_prefix_ci_exact_match() { assert_eq!(strip_prefix_ci("hello", "hello"), Some("")); } #[test] fn strip_prefix_ci_multibyte_no_panic_smart_quote() { // "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5. // A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char. // Previously this caused: "byte index 6 is not a char boundary". let text = "abcde\u{2019}xyz"; assert_eq!(strip_prefix_ci(text, "abcdef"), None); } #[test] fn strip_prefix_ci_multibyte_no_panic_emoji() { // U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it. let text = "abc\u{1F600}def"; assert_eq!(strip_prefix_ci(text, "abcd"), None); } // -- strip_bot_mention -------------------------------------------------- #[test] fn strip_mention_full_user_id() { let rest = strip_bot_mention( "@timmy:homeserver.local help", "Timmy", "@timmy:homeserver.local", ); assert_eq!(rest.trim(), "help"); } #[test] fn strip_mention_localpart() { let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim(), "help me"); } #[test] fn strip_mention_display_name() { let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim(), "help"); } #[test] fn strip_mention_display_name_case_insensitive() { let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim(), "help"); } #[test] fn strip_mention_no_match_returns_original() { let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest, "hello world"); } #[test] fn strip_mention_does_not_match_longer_name() { // "@timmybot" should NOT match "@timmy" let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest, "@timmybot help"); } #[test] fn strip_mention_comma_after_name() { let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest.trim(), "help"); } #[test] fn strip_mention_colon_separator_element_tab_completion() { // Element tab-completes display names with a trailing ": " let rest = strip_bot_mention( "timmy ⚡️: ambient on", "timmy ⚡️", "@timmy:homeserver.local", ); assert_eq!(rest, "ambient on"); } #[test] fn strip_mention_emoji_display_name_no_separator() { // Display name with emoji, no separator let rest = strip_bot_mention("timmy ⚡️ ambient on", "timmy ⚡️", "@timmy:homeserver.local"); assert_eq!(rest, "ambient on"); } #[test] fn strip_mention_colon_after_localpart() { // Element may also produce "@timmy: help" let rest = strip_bot_mention("@timmy: help", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest, "help"); } #[test] fn strip_mention_short_name_emoji_suffix_in_body() { // bot_name is "Timmy" (no emoji) but Element mention pill puts // "Timmy ⚡️ status" in the body — the emoji is part of the display // name as set on the Matrix server, not in bot.toml. let rest = strip_bot_mention("Timmy ⚡️ status", "Timmy", "@timmy:homeserver.local"); assert_eq!(rest, "status"); } #[test] fn strip_mention_element_markdown_pill_format() { // Element sends "[DisplayName](https://matrix.to/#/@user:server) command" // when a user uses the @ autocomplete mention pill. let rest = strip_bot_mention( "[Timmy](https://matrix.to/#/@timmy:homeserver.local) status", "Timmy", "@timmy:homeserver.local", ); assert_eq!(rest, "status"); } #[test] fn strip_mention_element_markdown_pill_with_emoji_display_name() { let rest = strip_bot_mention( "[timmy ⚡️](https://matrix.to/#/@timmy:homeserver.local) ambient on", "timmy ⚡️", "@timmy:homeserver.local", ); assert_eq!(rest, "ambient on"); } #[test] fn strip_mention_element_markdown_pill_wrong_user_id_no_strip() { // Pill for a different user should not be stripped. let rest = strip_bot_mention( "[Other](https://matrix.to/#/@other:homeserver.local) status", "Timmy", "@timmy:homeserver.local", ); assert_eq!( rest, "[Other](https://matrix.to/#/@other:homeserver.local) status" ); } #[test] fn strip_mention_element_markdown_pill_no_trailing_command() { // Pill with no command after it returns empty string (handled by callers). let rest = strip_bot_mention( "[Timmy](https://matrix.to/#/@timmy:homeserver.local)", "Timmy", "@timmy:homeserver.local", ); assert_eq!(rest, ""); } // -- drain_complete_paragraphs ------------------------------------------ #[test] fn drain_complete_paragraphs_no_boundary_returns_empty() { let mut buf = "Hello World".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert!(paras.is_empty()); assert_eq!(buf, "Hello World"); } #[test] fn drain_complete_paragraphs_single_boundary() { let mut buf = "Paragraph one.\n\nParagraph two.".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["Paragraph one."]); assert_eq!(buf, "Paragraph two."); } #[test] fn drain_complete_paragraphs_multiple_boundaries() { let mut buf = "A\n\nB\n\nC".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["A", "B"]); assert_eq!(buf, "C"); } #[test] fn drain_complete_paragraphs_trailing_boundary() { let mut buf = "A\n\nB\n\n".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["A", "B"]); assert_eq!(buf, ""); } #[test] fn drain_complete_paragraphs_empty_input() { let mut buf = String::new(); let paras = drain_complete_paragraphs(&mut buf); assert!(paras.is_empty()); assert_eq!(buf, ""); } #[test] fn drain_complete_paragraphs_skips_empty_chunks() { // Consecutive double-newlines produce no empty paragraphs. let mut buf = "\n\n\n\nHello".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert!(paras.is_empty()); assert_eq!(buf, "Hello"); } #[test] fn drain_complete_paragraphs_trims_whitespace() { let mut buf = " Hello \n\n World ".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras, vec!["Hello"]); assert_eq!(buf, " World "); } // -- drain_complete_paragraphs: code-fence awareness ------------------- #[test] fn drain_complete_paragraphs_code_fence_blank_line_not_split() { // A blank line inside a fenced code block must NOT trigger a split. let mut buf = "```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph." .to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!( paras.len(), 1, "code fence with blank line should not be split into multiple messages: {paras:?}" ); assert!( paras[0].starts_with("```rust"), "first paragraph should be the code fence: {:?}", paras[0] ); assert!( paras[0].contains("let y = 2;"), "code fence should contain content from both sides of the blank line: {:?}", paras[0] ); assert_eq!(buf, "Next paragraph."); } #[test] fn drain_complete_paragraphs_text_before_and_after_fenced_block() { // Text paragraph, then a code block with an internal blank line, then more text. let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string(); let paras = drain_complete_paragraphs(&mut buf); assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}"); assert_eq!(paras[0], "Before"); assert!( paras[1].starts_with("```"), "second paragraph should be the code fence: {:?}", paras[1] ); assert!( paras[1].contains("more code"), "code fence content must include the part after the blank line: {:?}", paras[1] ); assert_eq!(buf, "After"); } #[test] fn drain_complete_paragraphs_incremental_simulation() { // Simulate tokens arriving one character at a time. let mut buf = String::new(); let mut all_paragraphs = Vec::new(); for ch in "First para.\n\nSecond para.\n\nThird.".chars() { buf.push(ch); all_paragraphs.extend(drain_complete_paragraphs(&mut buf)); } assert_eq!(all_paragraphs, vec!["First para.", "Second para."]); assert_eq!(buf, "Third."); } // -- normalize_line_breaks ----------------------------------------------- #[test] fn normalize_prose_single_newline_becomes_double() { let input = "Sentence one.\nSentence two."; let output = normalize_line_breaks(input); assert_eq!(output, "Sentence one.\n\nSentence two."); } #[test] fn normalize_existing_double_newline_unchanged() { let input = "Paragraph one.\n\nParagraph two."; let output = normalize_line_breaks(input); assert_eq!(output, "Paragraph one.\n\nParagraph two."); } #[test] fn normalize_bullet_list_single_newlines_preserved() { let input = "- item one\n- item two\n- item three"; let output = normalize_line_breaks(input); assert_eq!(output, "- item one\n- item two\n- item three"); } #[test] fn normalize_heading_single_newline_preserved() { let input = "# My Heading\nSome text below."; let output = normalize_line_breaks(input); assert_eq!(output, "# My Heading\nSome text below."); } #[test] fn normalize_table_rows_single_newlines_preserved() { let input = "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |"; let output = normalize_line_breaks(input); assert_eq!(output, "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |"); } #[test] fn normalize_code_block_content_preserved_verbatim() { let input = "```rust\nlet x = 1;\nlet y = 2;\n```"; let output = normalize_line_breaks(input); assert_eq!(output, input); } #[test] fn normalize_code_block_with_blank_line_inside_preserved() { let input = "```\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```"; let output = normalize_line_breaks(input); assert_eq!(output, input); } #[test] fn normalize_mixed_prose_and_code_block() { let input = "First sentence.\nSecond sentence.\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nThird sentence.\nFourth sentence."; let output = normalize_line_breaks(input); // Prose sentences before and after the code block get doubled. // The code block itself is preserved. assert!( output.contains("First sentence.\n\nSecond sentence."), "prose before code: {output}" ); assert!( output.contains("```rust\nlet x = 1;\nlet y = 2;\n```"), "code block preserved: {output}" ); assert!( output.contains("Third sentence.\n\nFourth sentence."), "prose after code: {output}" ); } #[test] fn normalize_ordered_list_single_newlines_preserved() { let input = "1. First item\n2. Second item\n3. Third item"; let output = normalize_line_breaks(input); assert_eq!(output, "1. First item\n2. Second item\n3. Third item"); } #[test] fn normalize_empty_string_unchanged() { assert_eq!(normalize_line_breaks(""), ""); } #[test] fn normalize_single_line_unchanged() { assert_eq!(normalize_line_breaks("Hello."), "Hello."); } #[test] fn normalize_prose_then_bullet_no_extra_blank() { // When prose is followed by a bullet item, no extra blank is inserted // because the bullet line is structured. let input = "Some prose.\n- bullet item"; let output = normalize_line_breaks(input); assert_eq!(output, "Some prose.\n- bullet item"); } }