diff --git a/server/src/matrix/bot.rs b/server/src/matrix/bot.rs index a9caacc..d292537 100644 --- a/server/src/matrix/bot.rs +++ b/server/src/matrix/bot.rs @@ -499,18 +499,51 @@ pub fn markdown_to_html(markdown: &str) -> String { // Paragraph buffering helper // --------------------------------------------------------------------------- +/// Returns `true` when `text` ends while inside an open fenced code block. +/// +/// A fenced code block opens and closes on lines that start with ` ``` ` +/// (three or more backticks). We count the fence markers and return `true` +/// when the count is odd (a fence was opened but not yet closed). +fn is_inside_code_fence(text: &str) -> bool { + let mut in_fence = false; + for line in text.lines() { + if line.trim_start().starts_with("```") { + in_fence = !in_fence; + } + } + in_fence +} + /// Drain all complete paragraphs from `buffer` and return them. /// /// A paragraph boundary is a double newline (`\n\n`). Each drained paragraph /// is trimmed of surrounding whitespace; empty paragraphs are discarded. /// The buffer is left with only the remaining incomplete text. +/// +/// **Code-fence awareness:** a `\n\n` that occurs *inside* a fenced code +/// block (delimited by ` ``` ` lines) is **not** treated as a paragraph +/// boundary. This prevents a blank line inside a code block from splitting +/// the fence across multiple Matrix messages, which would corrupt the +/// rendering of the second half. pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec { let mut paragraphs = Vec::new(); - while let Some(pos) = buffer.find("\n\n") { - let chunk = buffer[..pos].trim().to_string(); - *buffer = buffer[pos + 2..].to_string(); - if !chunk.is_empty() { - paragraphs.push(chunk); + let mut search_from = 0; + loop { + let Some(pos) = buffer[search_from..].find("\n\n") else { + break; + }; + let abs_pos = search_from + pos; + // Only split at this boundary when we are NOT inside a code fence. + if is_inside_code_fence(&buffer[..abs_pos]) { + // Skip past this \n\n and keep looking for the next boundary. + search_from = abs_pos + 2; + } else { + let chunk = buffer[..abs_pos].trim().to_string(); + *buffer = buffer[abs_pos + 2..].to_string(); + search_from = 0; + if !chunk.is_empty() { + paragraphs.push(chunk); + } } } paragraphs @@ -733,6 +766,55 @@ mod tests { assert_eq!(buf, " World "); } + // -- drain_complete_paragraphs: code-fence awareness ------------------- + + #[test] + fn drain_complete_paragraphs_code_fence_blank_line_not_split() { + // A blank line inside a fenced code block must NOT trigger a split. + // Before the fix the function would split at the blank line and the + // second half would be sent without the opening fence, breaking rendering. + let mut buf = "```rust\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```\n\nNext paragraph." + .to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!( + paras.len(), + 1, + "code fence with blank line should not be split into multiple messages: {paras:?}" + ); + assert!( + paras[0].starts_with("```rust"), + "first paragraph should be the code fence: {:?}", + paras[0] + ); + assert!( + paras[0].contains("let y = 2;"), + "code fence should contain content from both sides of the blank line: {:?}", + paras[0] + ); + assert_eq!(buf, "Next paragraph."); + } + + #[test] + fn drain_complete_paragraphs_text_before_and_after_fenced_block() { + // Text paragraph, then a code block with an internal blank line, then more text. + let mut buf = + "Before\n\n```\ncode\n\nmore code\n```\n\nAfter".to_string(); + let paras = drain_complete_paragraphs(&mut buf); + assert_eq!(paras.len(), 2, "expected two paragraphs: {paras:?}"); + assert_eq!(paras[0], "Before"); + assert!( + paras[1].starts_with("```"), + "second paragraph should be the code fence: {:?}", + paras[1] + ); + assert!( + paras[1].contains("more code"), + "code fence content must include the part after the blank line: {:?}", + paras[1] + ); + assert_eq!(buf, "After"); + } + #[test] fn drain_complete_paragraphs_incremental_simulation() { // Simulate tokens arriving one character at a time.