server/src/chat/util.rs

//! Shared text utilities used by all chat transports.
//!
//! These functions are transport-agnostic helpers for processing chat messages:
//! prefix stripping, bot-mention handling, and paragraph buffering.

/// Case-insensitive prefix strip that also requires the match to end at a
/// word boundary (whitespace, punctuation, or end-of-string).
pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
    let candidate = text.get(..prefix.len())?;
    if !candidate.eq_ignore_ascii_case(prefix) {
        return None;
    }
    let rest = &text[prefix.len()..];
    // Must be at end or followed by non-alphanumeric
    match rest.chars().next() {
        None => Some(rest), // exact match, empty remainder
        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None, // not a word boundary
        _ => Some(rest),
    }
}

/// Strip the bot mention prefix from a raw message body.
///
/// Handles these forms (case-insensitive where applicable):
/// - `@bot_localpart:server.com rest` → `rest`
/// - `@bot_localpart rest` → `rest`
/// - `DisplayName rest` → `rest`
pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
    let trimmed = message.trim();

    // Try full Matrix user ID (e.g. "@timmy:homeserver.local")
    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
        return rest;
    }

    // Try @localpart (e.g. "@timmy")
    if let Some(localpart) = bot_user_id.split(':').next()
        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
    {
        return rest;
    }

    // Try display name (e.g. "Timmy")
    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
        return rest;
    }

    trimmed
}

/// Returns `true` when `text` ends while inside an open fenced code block.
///
/// A fenced code block opens and closes on lines that start with ` ``` `
/// (three or more backticks).  We count the fence markers and return `true`
/// when the count is odd (a fence was opened but not yet closed).
fn is_inside_code_fence(text: &str) -> bool {
    let mut in_fence = false;
    for line in text.lines() {
        if line.trim_start().starts_with("```") {
            in_fence = !in_fence;
        }
    }
    in_fence
}

/// Drain all complete paragraphs from `buffer` and return them.
///
/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph
/// is trimmed of surrounding whitespace; empty paragraphs are discarded.
/// The buffer is left with only the remaining incomplete text.
///
/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code
/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph
/// boundary.  This prevents a blank line inside a code block from splitting
/// the fence across multiple messages, which would corrupt the rendering.
pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {
    let mut paragraphs = Vec::new();
    let mut search_from = 0;
    loop {
        let Some(pos) = buffer[search_from..].find("\n\n") else {
            break;
        };
        let abs_pos = search_from + pos;
        // Only split at this boundary when we are NOT inside a code fence.
        if is_inside_code_fence(&buffer[..abs_pos]) {
            // Skip past this \n\n and keep looking for the next boundary.
            search_from = abs_pos + 2;
        } else {
            let chunk = buffer[..abs_pos].trim().to_string();
            *buffer = buffer[abs_pos + 2..].to_string();
            search_from = 0;
            if !chunk.is_empty() {
                paragraphs.push(chunk);
            }
        }
    }
    paragraphs
}

/// Normalize single newlines between prose lines to double newlines.
///
/// LLMs sometimes output text with single newlines between sentences, e.g.:
/// ```text
/// Sentence one.
/// Sentence two.
/// ```
///
/// In Markdown a single newline is a *soft break* and may render as a space
/// (or nothing), causing sentences to appear joined ("sentence one.Sentence
/// two").  This function converts single newlines between non-empty prose
/// lines into double newlines (paragraph breaks) so they render correctly.
///
/// Single newlines are **preserved** (not doubled) when either the preceding
/// or following line is a structured Markdown element:
/// - Bullet list items (`- `, `* `, `+ `)
/// - Ordered list items (`1. `, `2. `, …)
/// - ATX headings (`#`, `##`, …)
/// - Table rows (`|`)
/// - Code fence delimiters (`` ``` ``)
///
/// Content inside fenced code blocks is also preserved verbatim.
pub fn normalize_line_breaks(text: &str) -> String {
    fn is_structured_line(line: &str) -> bool {
        let trimmed = line.trim_start();
        if trimmed.is_empty() {
            return false;
        }
        if trimmed.starts_with('#')
            || trimmed.starts_with("- ")
            || trimmed.starts_with("* ")
            || trimmed.starts_with("+ ")
            || trimmed.starts_with('|')
            || trimmed.starts_with("```")
        {
            return true;
        }
        // Ordered list: one or more digits followed by ". "
        let after_digits = trimmed.trim_start_matches(|c: char| c.is_ascii_digit());
        if !after_digits.is_empty()
            && after_digits.starts_with(". ")
            && after_digits.len() < trimmed.len()
        {
            return true;
        }
        // Horizontal rules: lines made entirely of -, *, or _ (at least 3 chars).
        let all_hr_chars = trimmed
            .chars()
            .all(|c| matches!(c, '-' | '*' | '_' | ' '));
        let hr_char_count = trimmed.chars().filter(|c| !c.is_whitespace()).count();
        all_hr_chars && hr_char_count >= 3
    }

    let lines: Vec<&str> = text.split('\n').collect();
    let mut result: Vec<&str> = Vec::with_capacity(lines.len() * 2);
    let mut in_code_fence = false;

    for (i, &line) in lines.iter().enumerate() {
        if line.trim_start().starts_with("```") {
            in_code_fence = !in_code_fence;
        }

        if i == 0 || in_code_fence {
            result.push(line);
            continue;
        }

        let prev_line = lines[i - 1];

        // Insert a blank separator when both the current and previous lines
        // are non-empty prose (not inside a code fence, not structured Markdown).
        let should_double = !line.is_empty()
            && !prev_line.is_empty()
            && !is_structured_line(line)
            && !is_structured_line(prev_line);

        if should_double {
            result.push("");
        }
        result.push(line);
    }

    result.join("\n")
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    // -- strip_prefix_ci ----------------------------------------------------

    #[test]
    fn strip_prefix_ci_basic() {
        assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));
    }

    #[test]
    fn strip_prefix_ci_no_match() {
        assert_eq!(strip_prefix_ci("goodbye", "hello"), None);
    }

    #[test]
    fn strip_prefix_ci_word_boundary_required() {
        assert_eq!(strip_prefix_ci("helloworld", "hello"), None);
    }

    #[test]
    fn strip_prefix_ci_exact_match() {
        assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));
    }

    #[test]
    fn strip_prefix_ci_multibyte_no_panic_smart_quote() {
        // "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5.
        // A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char.
        // Previously this caused: "byte index 6 is not a char boundary".
        let text = "abcde\u{2019}xyz";
        assert_eq!(strip_prefix_ci(text, "abcdef"), None);
    }

    #[test]
    fn strip_prefix_ci_multibyte_no_panic_emoji() {
        // U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it.
        let text = "abc\u{1F600}def";
        assert_eq!(strip_prefix_ci(text, "abcd"), None);
    }

    // -- strip_bot_mention --------------------------------------------------

    #[test]
    fn strip_mention_full_user_id() {
        let rest = strip_bot_mention(
            "@timmy:homeserver.local help",
            "Timmy",
            "@timmy:homeserver.local",
        );
        assert_eq!(rest.trim(), "help");
    }

    #[test]
    fn strip_mention_localpart() {
        let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");
        assert_eq!(rest.trim(), "help me");
    }

    #[test]
    fn strip_mention_display_name() {
        let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");
        assert_eq!(rest.trim(), "help");
    }

    #[test]
    fn strip_mention_display_name_case_insensitive() {
        let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");
        assert_eq!(rest.trim(), "help");
    }

    #[test]
    fn strip_mention_no_match_returns_original() {
        let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");
        assert_eq!(rest, "hello world");
    }

    #[test]
    fn strip_mention_does_not_match_longer_name() {
        // "@timmybot" should NOT match "@timmy"
        let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");
        assert_eq!(rest, "@timmybot help");
    }

    #[test]
    fn strip_mention_comma_after_name() {
        let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");
        assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");
    }

    // -- drain_complete_paragraphs ------------------------------------------

    #[test]
    fn drain_complete_paragraphs_no_boundary_returns_empty() {
        let mut buf = "Hello World".to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert!(paras.is_empty());
        assert_eq!(buf, "Hello World");
    }

    #[test]
    fn drain_complete_paragraphs_single_boundary() {
        let mut buf = "Paragraph one.\n\nParagraph two.".to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert_eq!(paras, vec!["Paragraph one."]);
        assert_eq!(buf, "Paragraph two.");
    }

    #[test]
    fn drain_complete_paragraphs_multiple_boundaries() {
        let mut buf = "A\n\nB\n\nC".to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert_eq!(paras, vec!["A", "B"]);
        assert_eq!(buf, "C");
    }

    #[test]
    fn drain_complete_paragraphs_trailing_boundary() {
        let mut buf = "A\n\nB\n\n".to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert_eq!(paras, vec!["A", "B"]);
        assert_eq!(buf, "");
    }

    #[test]
    fn drain_complete_paragraphs_empty_input() {
        let mut buf = String::new();
        let paras = drain_complete_paragraphs(&mut buf);
        assert!(paras.is_empty());
        assert_eq!(buf, "");
    }

    #[test]
    fn drain_complete_paragraphs_skips_empty_chunks() {
        // Consecutive double-newlines produce no empty paragraphs.
        let mut buf = "\n\n\n\nHello".to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert!(paras.is_empty());
        assert_eq!(buf, "Hello");
    }

    #[test]
    fn drain_complete_paragraphs_trims_whitespace() {
        let mut buf = "  Hello  \n\n  World  ".to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert_eq!(paras, vec!["Hello"]);
        assert_eq!(buf, "  World  ");
    }

    // -- drain_complete_paragraphs: code-fence awareness -------------------

    #[test]
    fn drain_complete_paragraphs_code_fence_blank_line_not_split() {
        // A blank line inside a fenced code block must NOT trigger a split.
        let mut buf =
            "```rust\nfn foo() {\n    let x = 1;\n\n    let y = 2;\n}\n```\n\nNext paragraph."
                .to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert_eq!(
            paras.len(),
            1,
            "code fence with blank line should not be split into multiple messages: {paras:?}"
        );
        assert!(
            paras[0].starts_with("```rust"),
            "first paragraph should be the code fence: {:?}",
            paras[0]
        );
        assert!(
            paras[0].contains("let y = 2;"),
            "code fence should contain content from both sides of the blank line: {:?}",
            paras[0]
        );
        assert_eq!(buf, "Next paragraph.");
    }

    #[test]
    fn drain_complete_paragraphs_text_before_and_after_fenced_block() {
        // Text paragraph, then a code block with an internal blank line, then more text.
        let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string();
        let paras = drain_complete_paragraphs(&mut buf);
        assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}");
        assert_eq!(paras[0], "Before");
        assert!(
            paras[1].starts_with("```"),
            "second paragraph should be the code fence: {:?}",
            paras[1]
        );
        assert!(
            paras[1].contains("more code"),
            "code fence content must include the part after the blank line: {:?}",
            paras[1]
        );
        assert_eq!(buf, "After");
    }

    #[test]
    fn drain_complete_paragraphs_incremental_simulation() {
        // Simulate tokens arriving one character at a time.
        let mut buf = String::new();
        let mut all_paragraphs = Vec::new();

        for ch in "First para.\n\nSecond para.\n\nThird.".chars() {
            buf.push(ch);
            all_paragraphs.extend(drain_complete_paragraphs(&mut buf));
        }

        assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);
        assert_eq!(buf, "Third.");
    }

    // -- normalize_line_breaks -----------------------------------------------

    #[test]
    fn normalize_prose_single_newline_becomes_double() {
        let input = "Sentence one.\nSentence two.";
        let output = normalize_line_breaks(input);
        assert_eq!(output, "Sentence one.\n\nSentence two.");
    }

    #[test]
    fn normalize_existing_double_newline_unchanged() {
        let input = "Paragraph one.\n\nParagraph two.";
        let output = normalize_line_breaks(input);
        assert_eq!(output, "Paragraph one.\n\nParagraph two.");
    }

    #[test]
    fn normalize_bullet_list_single_newlines_preserved() {
        let input = "- item one\n- item two\n- item three";
        let output = normalize_line_breaks(input);
        assert_eq!(output, "- item one\n- item two\n- item three");
    }

    #[test]
    fn normalize_heading_single_newline_preserved() {
        let input = "# My Heading\nSome text below.";
        let output = normalize_line_breaks(input);
        assert_eq!(output, "# My Heading\nSome text below.");
    }

    #[test]
    fn normalize_table_rows_single_newlines_preserved() {
        let input = "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |";
        let output = normalize_line_breaks(input);
        assert_eq!(output, "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |");
    }

    #[test]
    fn normalize_code_block_content_preserved_verbatim() {
        let input = "```rust\nlet x = 1;\nlet y = 2;\n```";
        let output = normalize_line_breaks(input);
        assert_eq!(output, input);
    }

    #[test]
    fn normalize_code_block_with_blank_line_inside_preserved() {
        let input = "```\nfn foo() {\n    let x = 1;\n\n    let y = 2;\n}\n```";
        let output = normalize_line_breaks(input);
        assert_eq!(output, input);
    }

    #[test]
    fn normalize_mixed_prose_and_code_block() {
        let input = "First sentence.\nSecond sentence.\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nThird sentence.\nFourth sentence.";
        let output = normalize_line_breaks(input);
        // Prose sentences before and after the code block get doubled.
        // The code block itself is preserved.
        assert!(output.contains("First sentence.\n\nSecond sentence."), "prose before code: {output}");
        assert!(output.contains("```rust\nlet x = 1;\nlet y = 2;\n```"), "code block preserved: {output}");
        assert!(output.contains("Third sentence.\n\nFourth sentence."), "prose after code: {output}");
    }

    #[test]
    fn normalize_ordered_list_single_newlines_preserved() {
        let input = "1. First item\n2. Second item\n3. Third item";
        let output = normalize_line_breaks(input);
        assert_eq!(output, "1. First item\n2. Second item\n3. Third item");
    }

    #[test]
    fn normalize_empty_string_unchanged() {
        assert_eq!(normalize_line_breaks(""), "");
    }

    #[test]
    fn normalize_single_line_unchanged() {
        assert_eq!(normalize_line_breaks("Hello."), "Hello.");
    }

    #[test]
    fn normalize_prose_then_bullet_no_extra_blank() {
        // When prose is followed by a bullet item, no extra blank is inserted
        // because the bullet line is structured.
        let input = "Some prose.\n- bullet item";
        let output = normalize_line_breaks(input);
        assert_eq!(output, "Some prose.\n- bullet item");
    }
}
storkit: merge 392_refactor_extract_shared_transport_utilities_from_matrix_module_into_chat_submodule 2026-03-25 14:43:28 +00:00			`//! Shared text utilities used by all chat transports.`
			`//!`
			`//! These functions are transport-agnostic helpers for processing chat messages:`
			`//! prefix stripping, bot-mention handling, and paragraph buffering.`

			`/// Case-insensitive prefix strip that also requires the match to end at a`
			`/// word boundary (whitespace, punctuation, or end-of-string).`
			`pub fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {`
			`let candidate = text.get(..prefix.len())?;`
			`if !candidate.eq_ignore_ascii_case(prefix) {`
			`return None;`
			`}`
			`let rest = &text[prefix.len()..];`
			`// Must be at end or followed by non-alphanumeric`
			`match rest.chars().next() {`
			`None => Some(rest), // exact match, empty remainder`
			`Some(c) if c.is_alphanumeric() \|\| c == '-' \|\| c == '_' => None, // not a word boundary`
			`_ => Some(rest),`
			`}`
			`}`

			`/// Strip the bot mention prefix from a raw message body.`
			`///`
			`/// Handles these forms (case-insensitive where applicable):`
			/// - `@bot_localpart:server.com rest` → `rest`
			/// - `@bot_localpart rest` → `rest`
			/// - `DisplayName rest` → `rest`
			`pub fn strip_bot_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {`
			`let trimmed = message.trim();`

			`// Try full Matrix user ID (e.g. "@timmy:homeserver.local")`
			`if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {`
			`return rest;`
			`}`

			`// Try @localpart (e.g. "@timmy")`
			`if let Some(localpart) = bot_user_id.split(':').next()`
			`&& let Some(rest) = strip_prefix_ci(trimmed, localpart)`
			`{`
			`return rest;`
			`}`

			`// Try display name (e.g. "Timmy")`
			`if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {`
			`return rest;`
			`}`

			`trimmed`
			`}`

			/// Returns `true` when `text` ends while inside an open fenced code block.
			`///`
			/// A fenced code block opens and closes on lines that start with ` ``` `
			/// (three or more backticks). We count the fence markers and return `true`
			`/// when the count is odd (a fence was opened but not yet closed).`
			`fn is_inside_code_fence(text: &str) -> bool {`
			`let mut in_fence = false;`
			`for line in text.lines() {`
			if line.trim_start().starts_with("```") {
			`in_fence = !in_fence;`
			`}`
			`}`
			`in_fence`
			`}`

			/// Drain all complete paragraphs from `buffer` and return them.
			`///`
			/// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph
			`/// is trimmed of surrounding whitespace; empty paragraphs are discarded.`
			`/// The buffer is left with only the remaining incomplete text.`
			`///`
			/// Code-fence awareness: a `\n\n` that occurs inside a fenced code
			/// block (delimited by ` ``` ` lines) is not treated as a paragraph
			`/// boundary. This prevents a blank line inside a code block from splitting`
			`/// the fence across multiple messages, which would corrupt the rendering.`
			`pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec<String> {`
			`let mut paragraphs = Vec::new();`
			`let mut search_from = 0;`
			`loop {`
			`let Some(pos) = buffer[search_from..].find("\n\n") else {`
			`break;`
			`};`
			`let abs_pos = search_from + pos;`
			`// Only split at this boundary when we are NOT inside a code fence.`
			`if is_inside_code_fence(&buffer[..abs_pos]) {`
			`// Skip past this \n\n and keep looking for the next boundary.`
			`search_from = abs_pos + 2;`
			`} else {`
			`let chunk = buffer[..abs_pos].trim().to_string();`
			`*buffer = buffer[abs_pos + 2..].to_string();`
			`search_from = 0;`
			`if !chunk.is_empty() {`
			`paragraphs.push(chunk);`
			`}`
			`}`
			`}`
			`paragraphs`
			`}`

storkit: merge 427_story_server_side_text_normalization_for_chat_message_line_breaks 2026-03-28 10:39:13 +00:00			`/// Normalize single newlines between prose lines to double newlines.`
			`///`
			`/// LLMs sometimes output text with single newlines between sentences, e.g.:`
			/// ```text
			`/// Sentence one.`
			`/// Sentence two.`
			/// ```
			`///`
			`/// In Markdown a single newline is a soft break and may render as a space`
			`/// (or nothing), causing sentences to appear joined ("sentence one.Sentence`
			`/// two"). This function converts single newlines between non-empty prose`
			`/// lines into double newlines (paragraph breaks) so they render correctly.`
			`///`
			`/// Single newlines are preserved (not doubled) when either the preceding`
			`/// or following line is a structured Markdown element:`
			/// - Bullet list items (`- `, `* `, `+ `)
			/// - Ordered list items (`1. `, `2. `, …)
			/// - ATX headings (`#`, `##`, …)
			/// - Table rows (`\|`)
			/// - Code fence delimiters (`` ``` ``)
			`///`
			`/// Content inside fenced code blocks is also preserved verbatim.`
			`pub fn normalize_line_breaks(text: &str) -> String {`
			`fn is_structured_line(line: &str) -> bool {`
			`let trimmed = line.trim_start();`
			`if trimmed.is_empty() {`
			`return false;`
			`}`
			`if trimmed.starts_with('#')`
			`\|\| trimmed.starts_with("- ")`
			`\|\| trimmed.starts_with("* ")`
			`\|\| trimmed.starts_with("+ ")`
			`\|\| trimmed.starts_with('\|')`
			\|\| trimmed.starts_with("```")
			`{`
			`return true;`
			`}`
			`// Ordered list: one or more digits followed by ". "`
			`let after_digits = trimmed.trim_start_matches(\|c: char\| c.is_ascii_digit());`
			`if !after_digits.is_empty()`
			`&& after_digits.starts_with(". ")`
			`&& after_digits.len() < trimmed.len()`
			`{`
			`return true;`
			`}`
			`// Horizontal rules: lines made entirely of -, *, or _ (at least 3 chars).`
			`let all_hr_chars = trimmed`
			`.chars()`
			`.all(\|c\| matches!(c, '-' \| '*' \| '_' \| ' '));`
			`let hr_char_count = trimmed.chars().filter(\|c\| !c.is_whitespace()).count();`
			`all_hr_chars && hr_char_count >= 3`
			`}`

			`let lines: Vec<&str> = text.split('\n').collect();`
			`let mut result: Vec<&str> = Vec::with_capacity(lines.len() * 2);`
			`let mut in_code_fence = false;`

			`for (i, &line) in lines.iter().enumerate() {`
			if line.trim_start().starts_with("```") {
			`in_code_fence = !in_code_fence;`
			`}`

			`if i == 0 \|\| in_code_fence {`
			`result.push(line);`
			`continue;`
			`}`

			`let prev_line = lines[i - 1];`

			`// Insert a blank separator when both the current and previous lines`
			`// are non-empty prose (not inside a code fence, not structured Markdown).`
			`let should_double = !line.is_empty()`
			`&& !prev_line.is_empty()`
			`&& !is_structured_line(line)`
			`&& !is_structured_line(prev_line);`

			`if should_double {`
			`result.push("");`
			`}`
			`result.push(line);`
			`}`

			`result.join("\n")`
			`}`

storkit: merge 392_refactor_extract_shared_transport_utilities_from_matrix_module_into_chat_submodule 2026-03-25 14:43:28 +00:00			`// ---------------------------------------------------------------------------`
			`// Tests`
			`// ---------------------------------------------------------------------------`

			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`

			`// -- strip_prefix_ci ----------------------------------------------------`

			`#[test]`
			`fn strip_prefix_ci_basic() {`
			`assert_eq!(strip_prefix_ci("Hello world", "hello"), Some(" world"));`
			`}`

			`#[test]`
			`fn strip_prefix_ci_no_match() {`
			`assert_eq!(strip_prefix_ci("goodbye", "hello"), None);`
			`}`

			`#[test]`
			`fn strip_prefix_ci_word_boundary_required() {`
			`assert_eq!(strip_prefix_ci("helloworld", "hello"), None);`
			`}`

			`#[test]`
			`fn strip_prefix_ci_exact_match() {`
			`assert_eq!(strip_prefix_ci("hello", "hello"), Some(""));`
			`}`

			`#[test]`
			`fn strip_prefix_ci_multibyte_no_panic_smart_quote() {`
			`// "abcde\u{2019}xyz" — U+2019 is 3 bytes starting at byte 5.`
			`// A prefix of length 6 (e.g. "abcdef") lands inside the 3-byte char.`
			`// Previously this caused: "byte index 6 is not a char boundary".`
			`let text = "abcde\u{2019}xyz";`
			`assert_eq!(strip_prefix_ci(text, "abcdef"), None);`
			`}`

			`#[test]`
			`fn strip_prefix_ci_multibyte_no_panic_emoji() {`
			`// U+1F600 is 4 bytes starting at byte 3. Prefix length 4 lands inside it.`
			`let text = "abc\u{1F600}def";`
			`assert_eq!(strip_prefix_ci(text, "abcd"), None);`
			`}`

			`// -- strip_bot_mention --------------------------------------------------`

			`#[test]`
			`fn strip_mention_full_user_id() {`
			`let rest = strip_bot_mention(`
			`"@timmy:homeserver.local help",`
			`"Timmy",`
			`"@timmy:homeserver.local",`
			`);`
			`assert_eq!(rest.trim(), "help");`
			`}`

			`#[test]`
			`fn strip_mention_localpart() {`
			`let rest = strip_bot_mention("@timmy help me", "Timmy", "@timmy:homeserver.local");`
			`assert_eq!(rest.trim(), "help me");`
			`}`

			`#[test]`
			`fn strip_mention_display_name() {`
			`let rest = strip_bot_mention("Timmy help", "Timmy", "@timmy:homeserver.local");`
			`assert_eq!(rest.trim(), "help");`
			`}`

			`#[test]`
			`fn strip_mention_display_name_case_insensitive() {`
			`let rest = strip_bot_mention("timmy help", "Timmy", "@timmy:homeserver.local");`
			`assert_eq!(rest.trim(), "help");`
			`}`

			`#[test]`
			`fn strip_mention_no_match_returns_original() {`
			`let rest = strip_bot_mention("hello world", "Timmy", "@timmy:homeserver.local");`
			`assert_eq!(rest, "hello world");`
			`}`

			`#[test]`
			`fn strip_mention_does_not_match_longer_name() {`
			`// "@timmybot" should NOT match "@timmy"`
			`let rest = strip_bot_mention("@timmybot help", "Timmy", "@timmy:homeserver.local");`
			`assert_eq!(rest, "@timmybot help");`
			`}`

			`#[test]`
			`fn strip_mention_comma_after_name() {`
			`let rest = strip_bot_mention("@timmy, help", "Timmy", "@timmy:homeserver.local");`
			`assert_eq!(rest.trim().trim_start_matches(',').trim(), "help");`
			`}`

			`// -- drain_complete_paragraphs ------------------------------------------`

			`#[test]`
			`fn drain_complete_paragraphs_no_boundary_returns_empty() {`
			`let mut buf = "Hello World".to_string();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert!(paras.is_empty());`
			`assert_eq!(buf, "Hello World");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_single_boundary() {`
			`let mut buf = "Paragraph one.\n\nParagraph two.".to_string();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert_eq!(paras, vec!["Paragraph one."]);`
			`assert_eq!(buf, "Paragraph two.");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_multiple_boundaries() {`
			`let mut buf = "A\n\nB\n\nC".to_string();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert_eq!(paras, vec!["A", "B"]);`
			`assert_eq!(buf, "C");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_trailing_boundary() {`
			`let mut buf = "A\n\nB\n\n".to_string();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert_eq!(paras, vec!["A", "B"]);`
			`assert_eq!(buf, "");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_empty_input() {`
			`let mut buf = String::new();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert!(paras.is_empty());`
			`assert_eq!(buf, "");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_skips_empty_chunks() {`
			`// Consecutive double-newlines produce no empty paragraphs.`
			`let mut buf = "\n\n\n\nHello".to_string();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert!(paras.is_empty());`
			`assert_eq!(buf, "Hello");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_trims_whitespace() {`
			`let mut buf = " Hello \n\n World ".to_string();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert_eq!(paras, vec!["Hello"]);`
			`assert_eq!(buf, " World ");`
			`}`

			`// -- drain_complete_paragraphs: code-fence awareness -------------------`

			`#[test]`
			`fn drain_complete_paragraphs_code_fence_blank_line_not_split() {`
			`// A blank line inside a fenced code block must NOT trigger a split.`
			`let mut buf =`
			"```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph."
			`.to_string();`
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert_eq!(`
			`paras.len(),`
			`1,`
			`"code fence with blank line should not be split into multiple messages: {paras:?}"`
			`);`
			`assert!(`
			paras[0].starts_with("```rust"),
			`"first paragraph should be the code fence: {:?}",`
			`paras[0]`
			`);`
			`assert!(`
			`paras[0].contains("let y = 2;"),`
			`"code fence should contain content from both sides of the blank line: {:?}",`
			`paras[0]`
			`);`
			`assert_eq!(buf, "Next paragraph.");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_text_before_and_after_fenced_block() {`
			`// Text paragraph, then a code block with an internal blank line, then more text.`
			let mut buf = "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string();
			`let paras = drain_complete_paragraphs(&mut buf);`
			`assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}");`
			`assert_eq!(paras[0], "Before");`
			`assert!(`
			paras[1].starts_with("```"),
			`"second paragraph should be the code fence: {:?}",`
			`paras[1]`
			`);`
			`assert!(`
			`paras[1].contains("more code"),`
			`"code fence content must include the part after the blank line: {:?}",`
			`paras[1]`
			`);`
			`assert_eq!(buf, "After");`
			`}`

			`#[test]`
			`fn drain_complete_paragraphs_incremental_simulation() {`
			`// Simulate tokens arriving one character at a time.`
			`let mut buf = String::new();`
			`let mut all_paragraphs = Vec::new();`

			`for ch in "First para.\n\nSecond para.\n\nThird.".chars() {`
			`buf.push(ch);`
			`all_paragraphs.extend(drain_complete_paragraphs(&mut buf));`
			`}`

			`assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);`
			`assert_eq!(buf, "Third.");`
			`}`
storkit: merge 427_story_server_side_text_normalization_for_chat_message_line_breaks 2026-03-28 10:39:13 +00:00
			`// -- normalize_line_breaks -----------------------------------------------`

			`#[test]`
			`fn normalize_prose_single_newline_becomes_double() {`
			`let input = "Sentence one.\nSentence two.";`
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, "Sentence one.\n\nSentence two.");`
			`}`

			`#[test]`
			`fn normalize_existing_double_newline_unchanged() {`
			`let input = "Paragraph one.\n\nParagraph two.";`
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, "Paragraph one.\n\nParagraph two.");`
			`}`

			`#[test]`
			`fn normalize_bullet_list_single_newlines_preserved() {`
			`let input = "- item one\n- item two\n- item three";`
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, "- item one\n- item two\n- item three");`
			`}`

			`#[test]`
			`fn normalize_heading_single_newline_preserved() {`
			`let input = "# My Heading\nSome text below.";`
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, "# My Heading\nSome text below.");`
			`}`

			`#[test]`
			`fn normalize_table_rows_single_newlines_preserved() {`
			`let input = "\| Col A \| Col B \|\n\| --- \| --- \|\n\| val1 \| val2 \|";`
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, "\| Col A \| Col B \|\n\| --- \| --- \|\n\| val1 \| val2 \|");`
			`}`

			`#[test]`
			`fn normalize_code_block_content_preserved_verbatim() {`
			let input = "```rust\nlet x = 1;\nlet y = 2;\n```";
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, input);`
			`}`

			`#[test]`
			`fn normalize_code_block_with_blank_line_inside_preserved() {`
			let input = "```\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```";
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, input);`
			`}`

			`#[test]`
			`fn normalize_mixed_prose_and_code_block() {`
			let input = "First sentence.\nSecond sentence.\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nThird sentence.\nFourth sentence.";
			`let output = normalize_line_breaks(input);`
			`// Prose sentences before and after the code block get doubled.`
			`// The code block itself is preserved.`
			`assert!(output.contains("First sentence.\n\nSecond sentence."), "prose before code: {output}");`
			assert!(output.contains("```rust\nlet x = 1;\nlet y = 2;\n```"), "code block preserved: {output}");
			`assert!(output.contains("Third sentence.\n\nFourth sentence."), "prose after code: {output}");`
			`}`

			`#[test]`
			`fn normalize_ordered_list_single_newlines_preserved() {`
			`let input = "1. First item\n2. Second item\n3. Third item";`
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, "1. First item\n2. Second item\n3. Third item");`
			`}`

			`#[test]`
			`fn normalize_empty_string_unchanged() {`
			`assert_eq!(normalize_line_breaks(""), "");`
			`}`

			`#[test]`
			`fn normalize_single_line_unchanged() {`
			`assert_eq!(normalize_line_breaks("Hello."), "Hello.");`
			`}`

			`#[test]`
			`fn normalize_prose_then_bullet_no_extra_blank() {`
			`// When prose is followed by a bullet item, no extra blank is inserted`
			`// because the bullet line is structured.`
			`let input = "Some prose.\n- bullet item";`
			`let output = normalize_line_breaks(input);`
			`assert_eq!(output, "Some prose.\n- bullet item");`
			`}`
storkit: merge 392_refactor_extract_shared_transport_utilities_from_matrix_module_into_chat_submodule 2026-03-25 14:43:28 +00:00			`}`