2026-03-25 14:43:28 +00:00
|
|
|
//! Shared text utilities used by all chat transports.
|
|
|
|
|
//!
|
|
|
|
|
//! These functions are transport-agnostic helpers for processing chat messages:
|
|
|
|
|
//! prefix stripping, bot-mention handling, and paragraph buffering.
|
|
|
|
|
|
2026-03-28 19:44:00 +00:00
|
|
|
/// Returns `true` if the message body is an affirmative permission response.
|
|
|
|
|
///
|
|
|
|
|
/// Recognised affirmative tokens (case-insensitive): `yes`, `y`, `approve`,
|
|
|
|
|
/// `allow`, `ok`. Anything else — including ambiguous text — is treated as
|
|
|
|
|
/// denial (fail-closed).
|
|
|
|
|
///
|
|
|
|
|
/// A leading `@mention` (e.g. `"@timmy yes"`) is stripped before checking, so
|
|
|
|
|
/// the bot name does not interfere with the result.
|
|
|
|
|
pub fn is_permission_approval(body: &str) -> bool {
|
|
|
|
|
// Strip a leading @mention (e.g. "@timmy yes") so the bot name doesn't
|
|
|
|
|
// interfere with the check.
|
|
|
|
|
let trimmed = body
|
|
|
|
|
.trim()
|
|
|
|
|
.trim_start_matches('@')
|
|
|
|
|
.split_whitespace()
|
|
|
|
|
.last()
|
|
|
|
|
.unwrap_or("")
|
|
|
|
|
.to_ascii_lowercase();
|
|
|
|
|
matches!(trimmed.as_str(), "yes" | "y" | "approve" | "allow" | "ok")
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:43:28 +00:00
|
|
|
/// Case-insensitive prefix strip that also requires the match to end at a
|
|
|
|
|
/// word boundary (whitespace, punctuation, or end-of-string).
|
|
|
|
|
pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
|
|
|
|
|
let candidate = text.get(..prefix.len())?;
|
|
|
|
|
if !candidate.eq_ignore_ascii_case(prefix) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
let rest = &text[prefix.len()..];
|
|
|
|
|
// Must be at end or followed by non-alphanumeric
|
|
|
|
|
match rest.chars().next() {
|
|
|
|
|
None => Some(rest), // exact match, empty remainder
|
|
|
|
|
Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
|
|
|
|
|
_ => Some(rest),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Strip the bot mention prefix from a raw message body.
|
|
|
|
|
///
|
|
|
|
|
/// Handles these forms (case-insensitive where applicable):
|
|
|
|
|
/// - `@bot_localpart:server.com rest` → `rest`
|
|
|
|
|
/// - `@bot_localpart rest` → `rest`
|
|
|
|
|
/// - `DisplayName rest` → `rest`
|
2026-03-31 10:14:53 +00:00
|
|
|
/// - `DisplayName: rest` → `rest` (Element tab-completion inserts a colon)
|
|
|
|
|
/// - `DisplayName, rest` → `rest` (Element tab-completion may insert a comma)
|
|
|
|
|
/// - `DisplayName ⚡️: rest` → `rest` (display name with emoji)
|
2026-04-03 09:57:25 +00:00
|
|
|
/// - `[DisplayName](https://matrix.to/#/@user:server) rest` → `rest` (Element mention pill)
|
2026-03-25 14:43:28 +00:00
|
|
|
pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
|
|
|
|
|
let trimmed = message.trim();
|
|
|
|
|
|
2026-04-03 09:57:25 +00:00
|
|
|
// Try Element Markdown mention pill format:
|
|
|
|
|
// "[DisplayName](https://matrix.to/#/@user:server) rest"
|
2026-04-03 12:56:39 +00:00
|
|
|
if trimmed.starts_with('[')
|
|
|
|
|
&& let Some(after_label) = trimmed.find("](https://matrix.to/#/")
|
|
|
|
|
{
|
|
|
|
|
let url_start = after_label + 2; // skip "]("
|
|
|
|
|
let url_content = &trimmed[url_start..]; // "https://matrix.to/#/@user:server) rest"
|
|
|
|
|
if let Some(close_paren) = url_content.find(')') {
|
|
|
|
|
let url = &url_content[..close_paren]; // "https://matrix.to/#/@user:server"
|
|
|
|
|
let matrix_prefix = "https://matrix.to/#/";
|
2026-04-03 16:12:52 +01:00
|
|
|
if let Some(mentioned_id) = url.strip_prefix(matrix_prefix)
|
|
|
|
|
&& mentioned_id.eq_ignore_ascii_case(bot_user_id)
|
|
|
|
|
{
|
|
|
|
|
let rest = &url_content[close_paren + 1..];
|
|
|
|
|
return strip_mention_separator(rest);
|
2026-04-03 09:57:25 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:43:28 +00:00
|
|
|
// Try full Matrix user ID (e.g. "@timmy:homeserver.local")
|
|
|
|
|
if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
|
2026-03-31 10:14:53 +00:00
|
|
|
return strip_mention_separator(rest);
|
2026-03-25 14:43:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try @localpart (e.g. "@timmy")
|
|
|
|
|
if let Some(localpart) = bot_user_id.split(':').next()
|
|
|
|
|
&& let Some(rest) = strip_prefix_ci(trimmed, localpart)
|
|
|
|
|
{
|
2026-03-31 10:14:53 +00:00
|
|
|
return strip_mention_separator(rest);
|
2026-03-25 14:43:28 +00:00
|
|
|
}
|
|
|
|
|
|
2026-03-31 10:14:53 +00:00
|
|
|
// Try display name (e.g. "Timmy" or "timmy ⚡️")
|
2026-03-25 14:43:28 +00:00
|
|
|
if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
|
2026-03-31 10:14:53 +00:00
|
|
|
return strip_mention_separator(rest);
|
2026-03-25 14:43:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
trimmed
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-02 18:06:52 +00:00
|
|
|
/// Strip decoration between a bot mention and the command text.
|
2026-03-31 10:14:53 +00:00
|
|
|
///
|
2026-04-02 18:06:52 +00:00
|
|
|
/// After the bot name/ID is stripped, what remains may include whitespace,
|
|
|
|
|
/// emoji from display names (e.g. `Timmy ⚡️`), and Element tab-completion
|
|
|
|
|
/// separators (`:` or `,`). This function skips all of that and returns a
|
|
|
|
|
/// slice starting at the first ASCII alphanumeric character (the command).
|
2026-03-31 10:14:53 +00:00
|
|
|
fn strip_mention_separator(rest: &str) -> &str {
|
2026-04-02 18:06:52 +00:00
|
|
|
let byte_skip = rest
|
|
|
|
|
.char_indices()
|
|
|
|
|
.find(|(_, c)| c.is_ascii_alphanumeric())
|
|
|
|
|
.map(|(i, _)| i)
|
|
|
|
|
.unwrap_or(rest.len());
|
|
|
|
|
&rest[byte_skip..]
|
2026-03-31 10:14:53 +00:00
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:43:28 +00:00
|
|
|
/// Returns `true` when `text` ends while inside an open fenced code block.
|
|
|
|
|
///
|
|
|
|
|
/// A fenced code block opens and closes on lines that start with ` ``` `
|
|
|
|
|
/// (three or more backticks). We count the fence markers and return `true`
|
|
|
|
|
/// when the count is odd (a fence was opened but not yet closed).
|
|
|
|
|
fn is_inside_code_fence(text: &str) -> bool {
|
|
|
|
|
let mut in_fence = false;
|
|
|
|
|
for line in text.lines() {
|
|
|
|
|
if line.trim_start().starts_with("```") {
|
|
|
|
|
in_fence = !in_fence;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
in_fence
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Drain all complete paragraphs from `buffer` and return them.
|
|
|
|
|
///
|
|
|
|
|
/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph
|
|
|
|
|
/// is trimmed of surrounding whitespace; empty paragraphs are discarded.
|
|
|
|
|
/// The buffer is left with only the remaining incomplete text.
|
|
|
|
|
///
|
|
|
|
|
/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code
|
|
|
|
|
/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph
|
|
|
|
|
/// boundary. This prevents a blank line inside a code block from splitting
|
|
|
|
|
/// the fence across multiple messages, which would corrupt the rendering.
|
|
|
|
|
pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {
|
|
|
|
|
let mut paragraphs = Vec::new();
|
|
|
|
|
let mut search_from = 0;
|
|
|
|
|
loop {
|
|
|
|
|
let Some(pos) = buffer[search_from..].find("\n\n") else {
|
|
|
|
|
break;
|
|
|
|
|
};
|
|
|
|
|
let abs_pos = search_from + pos;
|
|
|
|
|
// Only split at this boundary when we are NOT inside a code fence.
|
|
|
|
|
if is_inside_code_fence(&buffer[..abs_pos]) {
|
|
|
|
|
// Skip past this \n\n and keep looking for the next boundary.
|
|
|
|
|
search_from = abs_pos + 2;
|
|
|
|
|
} else {
|
|
|
|
|
let chunk = buffer[..abs_pos].trim().to_string();
|
|
|
|
|
*buffer = buffer[abs_pos + 2..].to_string();
|
|
|
|
|
search_from = 0;
|
|
|
|
|
if !chunk.is_empty() {
|
|
|
|
|
paragraphs.push(chunk);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
paragraphs
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-28 10:39:13 +00:00
|
|
|
/// Normalize single newlines between prose lines to double newlines.
|
|
|
|
|
///
|
|
|
|
|
/// LLMs sometimes output text with single newlines between sentences, e.g.:
|
|
|
|
|
/// ```text
|
|
|
|
|
/// Sentence one.
|
|
|
|
|
/// Sentence two.
|
|
|
|
|
/// ```
|
|
|
|
|
///
|
|
|
|
|
/// In Markdown a single newline is a *soft break* and may render as a space
|
|
|
|
|
/// (or nothing), causing sentences to appear joined ("sentence one.Sentence
|
|
|
|
|
/// two"). This function converts single newlines between non-empty prose
|
|
|
|
|
/// lines into double newlines (paragraph breaks) so they render correctly.
|
|
|
|
|
///
|
|
|
|
|
/// Single newlines are **preserved** (not doubled) when either the preceding
|
|
|
|
|
/// or following line is a structured Markdown element:
|
|
|
|
|
/// - Bullet list items (`- `, `* `, `+ `)
|
|
|
|
|
/// - Ordered list items (`1. `, `2. `, …)
|
|
|
|
|
/// - ATX headings (`#`, `##`, …)
|
|
|
|
|
/// - Table rows (`|`)
|
|
|
|
|
/// - Code fence delimiters (`` ``` ``)
|
|
|
|
|
///
|
|
|
|
|
/// Content inside fenced code blocks is also preserved verbatim.
|
|
|
|
|
pub fn normalize_line_breaks(text: &str) -> String {
|
|
|
|
|
fn is_structured_line(line: &str) -> bool {
|
|
|
|
|
let trimmed = line.trim_start();
|
|
|
|
|
if trimmed.is_empty() {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if trimmed.starts_with('#')
|
|
|
|
|
|| trimmed.starts_with("- ")
|
|
|
|
|
|| trimmed.starts_with("* ")
|
|
|
|
|
|| trimmed.starts_with("+ ")
|
|
|
|
|
|| trimmed.starts_with('|')
|
|
|
|
|
|| trimmed.starts_with("```")
|
|
|
|
|
{
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// Ordered list: one or more digits followed by ". "
|
|
|
|
|
let after_digits = trimmed.trim_start_matches(|c: char| c.is_ascii_digit());
|
|
|
|
|
if !after_digits.is_empty()
|
|
|
|
|
&& after_digits.starts_with(". ")
|
|
|
|
|
&& after_digits.len() < trimmed.len()
|
|
|
|
|
{
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// Horizontal rules: lines made entirely of -, *, or _ (at least 3 chars).
|
2026-04-13 14:07:08 +00:00
|
|
|
let all_hr_chars = trimmed.chars().all(|c| matches!(c, '-' | '*' | '_' | ' '));
|
2026-03-28 10:39:13 +00:00
|
|
|
let hr_char_count = trimmed.chars().filter(|c| !c.is_whitespace()).count();
|
|
|
|
|
all_hr_chars && hr_char_count >= 3
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let lines: Vec<&str> = text.split('\n').collect();
|
|
|
|
|
let mut result: Vec<&str> = Vec::with_capacity(lines.len() * 2);
|
|
|
|
|
let mut in_code_fence = false;
|
|
|
|
|
|
|
|
|
|
for (i, &line) in lines.iter().enumerate() {
|
|
|
|
|
if line.trim_start().starts_with("```") {
|
|
|
|
|
in_code_fence = !in_code_fence;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if i == 0 || in_code_fence {
|
|
|
|
|
result.push(line);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let prev_line = lines[i - 1];
|
|
|
|
|
|
2026-04-16 08:18:22 +00:00
|
|
|
// ATX headings (lines starting with one or more `#` characters) always
|
|
|
|
|
// need a blank line before and after them so that Matrix clients render
|
|
|
|
|
// the heading with visual separation. Without a blank line, a single
|
|
|
|
|
// newline between a heading and adjacent text is swallowed by many
|
|
|
|
|
// Matrix clients (including Element X), joining the heading text and
|
|
|
|
|
// the following content on the same line without any heading formatting.
|
|
|
|
|
let is_cur_heading = line.trim_start().starts_with('#');
|
|
|
|
|
let is_prev_heading = prev_line.trim_start().starts_with('#');
|
|
|
|
|
|
|
|
|
|
// Insert a blank separator when:
|
|
|
|
|
// 1. Both lines are non-empty prose (standard prose-to-prose rule).
|
|
|
|
|
// 2. The current line is an ATX heading (adds blank line *before* it).
|
|
|
|
|
// 3. The previous line was an ATX heading (adds blank line *after* it).
|
2026-03-28 10:39:13 +00:00
|
|
|
let should_double = !line.is_empty()
|
|
|
|
|
&& !prev_line.is_empty()
|
2026-04-16 08:18:22 +00:00
|
|
|
&& ((!is_structured_line(line) && !is_structured_line(prev_line))
|
|
|
|
|
|| is_cur_heading
|
|
|
|
|
|| is_prev_heading);
|
2026-03-28 10:39:13 +00:00
|
|
|
|
|
|
|
|
if should_double {
|
|
|
|
|
result.push("");
|
|
|
|
|
}
|
|
|
|
|
result.push(line);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result.join("\n")
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:43:28 +00:00
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Tests
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
2026-03-28 19:44:00 +00:00
|
|
|
// -- is_permission_approval ---------------------------------------------
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn is_permission_approval_accepts_yes_variants() {
|
|
|
|
|
assert!(is_permission_approval("yes"));
|
|
|
|
|
assert!(is_permission_approval("Yes"));
|
|
|
|
|
assert!(is_permission_approval("YES"));
|
|
|
|
|
assert!(is_permission_approval("y"));
|
|
|
|
|
assert!(is_permission_approval("Y"));
|
|
|
|
|
assert!(is_permission_approval("approve"));
|
|
|
|
|
assert!(is_permission_approval("allow"));
|
|
|
|
|
assert!(is_permission_approval("ok"));
|
|
|
|
|
assert!(is_permission_approval("OK"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn is_permission_approval_denies_no_and_other() {
|
|
|
|
|
assert!(!is_permission_approval("no"));
|
|
|
|
|
assert!(!is_permission_approval("No"));
|
|
|
|
|
assert!(!is_permission_approval("n"));
|
|
|
|
|
assert!(!is_permission_approval("deny"));
|
|
|
|
|
assert!(!is_permission_approval("reject"));
|
|
|
|
|
assert!(!is_permission_approval("maybe"));
|
|
|
|
|
assert!(!is_permission_approval(""));
|
|
|
|
|
assert!(!is_permission_approval("yes please do it"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn is_permission_approval_strips_at_mention_prefix() {
|
|
|
|
|
assert!(is_permission_approval("@timmy yes"));
|
|
|
|
|
assert!(!is_permission_approval("@timmy no"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn is_permission_approval_handles_whitespace() {
|
|
|
|
|
assert!(is_permission_approval(" yes "));
|
|
|
|
|
assert!(is_permission_approval("\tyes\n"));
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:43:28 +00:00
|
|
|
// -- strip_prefix_ci ----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_prefix_ci_basic() {
|
|
|
|
|
assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_prefix_ci_no_match() {
|
|
|
|
|
assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_prefix_ci_word_boundary_required() {
|
|
|
|
|
assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_prefix_ci_exact_match() {
|
|
|
|
|
assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_prefix_ci_multibyte_no_panic_smart_quote() {
|
|
|
|
|
// "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5.
|
|
|
|
|
// A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char.
|
|
|
|
|
// Previously this caused: "byte index 6 is not a char boundary".
|
|
|
|
|
let text = "abcde\u{2019}xyz";
|
|
|
|
|
assert_eq!(strip_prefix_ci(text, "abcdef"), None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_prefix_ci_multibyte_no_panic_emoji() {
|
|
|
|
|
// U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it.
|
|
|
|
|
let text = "abc\u{1F600}def";
|
|
|
|
|
assert_eq!(strip_prefix_ci(text, "abcd"), None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -- strip_bot_mention --------------------------------------------------
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_full_user_id() {
|
|
|
|
|
let rest = strip_bot_mention(
|
|
|
|
|
"@timmy:homeserver.local help",
|
|
|
|
|
"Timmy",
|
|
|
|
|
"@timmy:homeserver.local",
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(rest.trim(), "help");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_localpart() {
|
|
|
|
|
let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
|
|
|
|
|
assert_eq!(rest.trim(), "help me");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_display_name() {
|
|
|
|
|
let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
|
|
|
|
|
assert_eq!(rest.trim(), "help");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_display_name_case_insensitive() {
|
|
|
|
|
let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
|
|
|
|
|
assert_eq!(rest.trim(), "help");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_no_match_returns_original() {
|
|
|
|
|
let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
|
|
|
|
|
assert_eq!(rest, "hello world");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_does_not_match_longer_name() {
|
|
|
|
|
// "@timmybot" should NOT match "@timmy"
|
|
|
|
|
let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
|
|
|
|
|
assert_eq!(rest, "@timmybot help");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_comma_after_name() {
|
|
|
|
|
let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
|
2026-03-31 10:14:53 +00:00
|
|
|
assert_eq!(rest.trim(), "help");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_colon_separator_element_tab_completion() {
|
|
|
|
|
// Element tab-completes display names with a trailing ": "
|
|
|
|
|
let rest = strip_bot_mention(
|
|
|
|
|
"timmy ⚡️: ambient on",
|
|
|
|
|
"timmy ⚡️",
|
|
|
|
|
"@timmy:homeserver.local",
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(rest, "ambient on");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_emoji_display_name_no_separator() {
|
|
|
|
|
// Display name with emoji, no separator
|
2026-04-13 14:07:08 +00:00
|
|
|
let rest = strip_bot_mention("timmy ⚡️ ambient on", "timmy ⚡️", "@timmy:homeserver.local");
|
2026-03-31 10:14:53 +00:00
|
|
|
assert_eq!(rest, "ambient on");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_colon_after_localpart() {
|
|
|
|
|
// Element may also produce "@timmy: help"
|
|
|
|
|
let rest = strip_bot_mention("@timmy: help", "Timmy", "@timmy:homeserver.local");
|
|
|
|
|
assert_eq!(rest, "help");
|
2026-03-25 14:43:28 +00:00
|
|
|
}
|
|
|
|
|
|
2026-04-02 18:06:52 +00:00
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_short_name_emoji_suffix_in_body() {
|
|
|
|
|
// bot_name is "Timmy" (no emoji) but Element mention pill puts
|
|
|
|
|
// "Timmy ⚡️ status" in the body — the emoji is part of the display
|
|
|
|
|
// name as set on the Matrix server, not in bot.toml.
|
|
|
|
|
let rest = strip_bot_mention("Timmy ⚡️ status", "Timmy", "@timmy:homeserver.local");
|
|
|
|
|
assert_eq!(rest, "status");
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-03 09:57:25 +00:00
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_element_markdown_pill_format() {
|
|
|
|
|
// Element sends "[DisplayName](https://matrix.to/#/@user:server) command"
|
|
|
|
|
// when a user uses the @ autocomplete mention pill.
|
|
|
|
|
let rest = strip_bot_mention(
|
|
|
|
|
"[Timmy](https://matrix.to/#/@timmy:homeserver.local) status",
|
|
|
|
|
"Timmy",
|
|
|
|
|
"@timmy:homeserver.local",
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(rest, "status");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_element_markdown_pill_with_emoji_display_name() {
|
|
|
|
|
let rest = strip_bot_mention(
|
|
|
|
|
"[timmy ⚡️](https://matrix.to/#/@timmy:homeserver.local) ambient on",
|
|
|
|
|
"timmy ⚡️",
|
|
|
|
|
"@timmy:homeserver.local",
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(rest, "ambient on");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_element_markdown_pill_wrong_user_id_no_strip() {
|
|
|
|
|
// Pill for a different user should not be stripped.
|
|
|
|
|
let rest = strip_bot_mention(
|
|
|
|
|
"[Other](https://matrix.to/#/@other:homeserver.local) status",
|
|
|
|
|
"Timmy",
|
|
|
|
|
"@timmy:homeserver.local",
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
rest,
|
|
|
|
|
"[Other](https://matrix.to/#/@other:homeserver.local) status"
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn strip_mention_element_markdown_pill_no_trailing_command() {
|
|
|
|
|
// Pill with no command after it returns empty string (handled by callers).
|
|
|
|
|
let rest = strip_bot_mention(
|
|
|
|
|
"[Timmy](https://matrix.to/#/@timmy:homeserver.local)",
|
|
|
|
|
"Timmy",
|
|
|
|
|
"@timmy:homeserver.local",
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(rest, "");
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-25 14:43:28 +00:00
|
|
|
// -- drain_complete_paragraphs ------------------------------------------
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_no_boundary_returns_empty() {
|
|
|
|
|
let mut buf = "Hello World".to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert!(paras.is_empty());
|
|
|
|
|
assert_eq!(buf, "Hello World");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_single_boundary() {
|
|
|
|
|
let mut buf = "Paragraph one.\n\nParagraph two.".to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert_eq!(paras, vec!["Paragraph one."]);
|
|
|
|
|
assert_eq!(buf, "Paragraph two.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_multiple_boundaries() {
|
|
|
|
|
let mut buf = "A\n\nB\n\nC".to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert_eq!(paras, vec!["A", "B"]);
|
|
|
|
|
assert_eq!(buf, "C");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_trailing_boundary() {
|
|
|
|
|
let mut buf = "A\n\nB\n\n".to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert_eq!(paras, vec!["A", "B"]);
|
|
|
|
|
assert_eq!(buf, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_empty_input() {
|
|
|
|
|
let mut buf = String::new();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert!(paras.is_empty());
|
|
|
|
|
assert_eq!(buf, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_skips_empty_chunks() {
|
|
|
|
|
// Consecutive double-newlines produce no empty paragraphs.
|
|
|
|
|
let mut buf = "\n\n\n\nHello".to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert!(paras.is_empty());
|
|
|
|
|
assert_eq!(buf, "Hello");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_trims_whitespace() {
|
|
|
|
|
let mut buf = " Hello \n\n World ".to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert_eq!(paras, vec!["Hello"]);
|
|
|
|
|
assert_eq!(buf, " World ");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -- drain_complete_paragraphs: code-fence awareness -------------------
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_code_fence_blank_line_not_split() {
|
|
|
|
|
// A blank line inside a fenced code block must NOT trigger a split.
|
|
|
|
|
let mut buf =
|
|
|
|
|
"```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph."
|
|
|
|
|
.to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
paras.len(),
|
|
|
|
|
1,
|
|
|
|
|
"code fence with blank line should not be split into multiple messages: {paras:?}"
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
paras[0].starts_with("```rust"),
|
|
|
|
|
"first paragraph should be the code fence: {:?}",
|
|
|
|
|
paras[0]
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
paras[0].contains("let y = 2;"),
|
|
|
|
|
"code fence should contain content from both sides of the blank line: {:?}",
|
|
|
|
|
paras[0]
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(buf, "Next paragraph.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_text_before_and_after_fenced_block() {
|
|
|
|
|
// Text paragraph, then a code block with an internal blank line, then more text.
|
|
|
|
|
let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string();
|
|
|
|
|
let paras = drain_complete_paragraphs(&mut buf);
|
|
|
|
|
assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}");
|
|
|
|
|
assert_eq!(paras[0], "Before");
|
|
|
|
|
assert!(
|
|
|
|
|
paras[1].starts_with("```"),
|
|
|
|
|
"second paragraph should be the code fence: {:?}",
|
|
|
|
|
paras[1]
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
paras[1].contains("more code"),
|
|
|
|
|
"code fence content must include the part after the blank line: {:?}",
|
|
|
|
|
paras[1]
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(buf, "After");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn drain_complete_paragraphs_incremental_simulation() {
|
|
|
|
|
// Simulate tokens arriving one character at a time.
|
|
|
|
|
let mut buf = String::new();
|
|
|
|
|
let mut all_paragraphs = Vec::new();
|
|
|
|
|
|
|
|
|
|
for ch in "First para.\n\nSecond para.\n\nThird.".chars() {
|
|
|
|
|
buf.push(ch);
|
|
|
|
|
all_paragraphs.extend(drain_complete_paragraphs(&mut buf));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);
|
|
|
|
|
assert_eq!(buf, "Third.");
|
|
|
|
|
}
|
2026-03-28 10:39:13 +00:00
|
|
|
|
|
|
|
|
// -- normalize_line_breaks -----------------------------------------------
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_prose_single_newline_becomes_double() {
|
|
|
|
|
let input = "Sentence one.\nSentence two.";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "Sentence one.\n\nSentence two.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_existing_double_newline_unchanged() {
|
|
|
|
|
let input = "Paragraph one.\n\nParagraph two.";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "Paragraph one.\n\nParagraph two.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_bullet_list_single_newlines_preserved() {
|
|
|
|
|
let input = "- item one\n- item two\n- item three";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "- item one\n- item two\n- item three");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
2026-04-16 08:18:22 +00:00
|
|
|
fn normalize_heading_followed_by_prose_gets_blank_line() {
|
|
|
|
|
// A blank line must be inserted after a heading so Matrix clients render
|
|
|
|
|
// the heading with visual separation from the following paragraph.
|
2026-03-28 10:39:13 +00:00
|
|
|
let input = "# My Heading\nSome text below.";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
2026-04-16 08:18:22 +00:00
|
|
|
assert_eq!(output, "# My Heading\n\nSome text below.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_prose_before_heading_gets_blank_line() {
|
|
|
|
|
// A blank line must be inserted before a heading when prose precedes it.
|
|
|
|
|
let input = "Some intro text.\n## Section";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "Some intro text.\n\n## Section");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_heading_surrounded_by_prose_gets_blank_lines_both_sides() {
|
|
|
|
|
let input = "Intro.\n## Heading\nContent.";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "Intro.\n\n## Heading\n\nContent.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_consecutive_headings_separated_by_blank_lines() {
|
|
|
|
|
let input = "## Section 1\n## Section 2";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "## Section 1\n\n## Section 2");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_heading_already_separated_by_blank_line_unchanged() {
|
|
|
|
|
// When there is already a blank line, no extra blank is inserted.
|
|
|
|
|
let input = "# Heading\n\nContent.";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "# Heading\n\nContent.");
|
2026-03-28 10:39:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_table_rows_single_newlines_preserved() {
|
|
|
|
|
let input = "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_code_block_content_preserved_verbatim() {
|
|
|
|
|
let input = "```rust\nlet x = 1;\nlet y = 2;\n```";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_code_block_with_blank_line_inside_preserved() {
|
|
|
|
|
let input = "```\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_mixed_prose_and_code_block() {
|
|
|
|
|
let input = "First sentence.\nSecond sentence.\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nThird sentence.\nFourth sentence.";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
// Prose sentences before and after the code block get doubled.
|
|
|
|
|
// The code block itself is preserved.
|
2026-04-13 14:07:08 +00:00
|
|
|
assert!(
|
|
|
|
|
output.contains("First sentence.\n\nSecond sentence."),
|
|
|
|
|
"prose before code: {output}"
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
output.contains("```rust\nlet x = 1;\nlet y = 2;\n```"),
|
|
|
|
|
"code block preserved: {output}"
|
|
|
|
|
);
|
|
|
|
|
assert!(
|
|
|
|
|
output.contains("Third sentence.\n\nFourth sentence."),
|
|
|
|
|
"prose after code: {output}"
|
|
|
|
|
);
|
2026-03-28 10:39:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_ordered_list_single_newlines_preserved() {
|
|
|
|
|
let input = "1. First item\n2. Second item\n3. Third item";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "1. First item\n2. Second item\n3. Third item");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_empty_string_unchanged() {
|
|
|
|
|
assert_eq!(normalize_line_breaks(""), "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_single_line_unchanged() {
|
|
|
|
|
assert_eq!(normalize_line_breaks("Hello."), "Hello.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn normalize_prose_then_bullet_no_extra_blank() {
|
|
|
|
|
// When prose is followed by a bullet item, no extra blank is inserted
|
|
|
|
|
// because the bullet line is structured.
|
|
|
|
|
let input = "Some prose.\n- bullet item";
|
|
|
|
|
let output = normalize_line_breaks(input);
|
|
|
|
|
assert_eq!(output, "Some prose.\n- bullet item");
|
|
|
|
|
}
|
2026-03-25 14:43:28 +00:00
|
|
|
}
|