diff --git a/server/src/chat/transport/matrix/bot/format.rs b/server/src/chat/transport/matrix/bot/format.rs
index fd8f0b60..fa2c87e5 100644
--- a/server/src/chat/transport/matrix/bot/format.rs
+++ b/server/src/chat/transport/matrix/bot/format.rs
@@ -14,11 +14,12 @@ pub fn format_startup_announcement(bot_name: &str) -> String {
/// tasklists) so that common Markdown constructs render correctly in Matrix
/// clients such as Element.
pub fn markdown_to_html(markdown: &str) -> String {
+ let normalized = crate::chat::util::normalize_line_breaks(markdown);
let options = Options::ENABLE_TABLES
| Options::ENABLE_FOOTNOTES
| Options::ENABLE_STRIKETHROUGH
| Options::ENABLE_TASKLISTS;
- let parser = Parser::new_ext(markdown, options);
+ let parser = Parser::new_ext(&normalized, options);
let mut html_output = String::new();
html::push_html(&mut html_output, parser);
html_output
@@ -80,6 +81,20 @@ mod tests {
);
}
+ #[test]
+ fn markdown_to_html_single_newline_prose_becomes_paragraphs() {
+ // Single newlines between prose sentences should produce separate paragraphs.
+ let html = markdown_to_html("Sentence one.\nSentence two.");
+ assert!(
+ html.contains("
Sentence one.
"),
+ "expected separate paragraph for first sentence: {html}"
+ );
+ assert!(
+ html.contains("Sentence two.
"),
+ "expected separate paragraph for second sentence: {html}"
+ );
+ }
+
#[test]
fn startup_announcement_uses_bot_name() {
assert_eq!(format_startup_announcement("Timmy"), "Timmy is online.");
diff --git a/server/src/chat/transport/slack/format.rs b/server/src/chat/transport/slack/format.rs
index 6153112d..3ef8ff84 100644
--- a/server/src/chat/transport/slack/format.rs
+++ b/server/src/chat/transport/slack/format.rs
@@ -6,9 +6,13 @@
/// This function converts common Markdown constructs so messages render
/// nicely in Slack instead of showing raw Markdown syntax.
pub fn markdown_to_slack(text: &str) -> String {
+ use crate::chat::util::normalize_line_breaks;
use regex::Regex;
use std::sync::LazyLock;
+ let normalized = normalize_line_breaks(text);
+ let text = normalized.as_str();
+
// Regexes are compiled once and reused across calls.
static RE_FENCED_BLOCK: LazyLock =
LazyLock::new(|| Regex::new(r"(?ms)^```.*?\n(.*?)^```").unwrap());
diff --git a/server/src/chat/transport/whatsapp/format.rs b/server/src/chat/transport/whatsapp/format.rs
index bf9fa9f4..5989ded4 100644
--- a/server/src/chat/transport/whatsapp/format.rs
+++ b/server/src/chat/transport/whatsapp/format.rs
@@ -55,6 +55,9 @@ pub fn chunk_for_whatsapp(text: &str) -> Vec {
/// This function converts common Markdown constructs so messages render
/// nicely in WhatsApp instead of showing raw Markdown syntax.
pub fn markdown_to_whatsapp(text: &str) -> String {
+ let normalized = crate::chat::util::normalize_line_breaks(text);
+ let text = normalized.as_str();
+
// Regexes are compiled once and reused across calls.
static RE_FENCED_BLOCK: LazyLock =
LazyLock::new(|| Regex::new(r"(?ms)^```.*?\n(.*?)^```").unwrap());
diff --git a/server/src/chat/util.rs b/server/src/chat/util.rs
index 0b9e20c6..2462ffd6 100644
--- a/server/src/chat/util.rs
+++ b/server/src/chat/util.rs
@@ -97,6 +97,91 @@ pub fn drain_complete_paragraphs(buffer: &mut String) -> Vec {
paragraphs
}
+/// Normalize single newlines between prose lines to double newlines.
+///
+/// LLMs sometimes output text with single newlines between sentences, e.g.:
+/// ```text
+/// Sentence one.
+/// Sentence two.
+/// ```
+///
+/// In Markdown a single newline is a *soft break* and may render as a space
+/// (or nothing), causing sentences to appear joined ("sentence one.Sentence
+/// two"). This function converts single newlines between non-empty prose
+/// lines into double newlines (paragraph breaks) so they render correctly.
+///
+/// Single newlines are **preserved** (not doubled) when either the preceding
+/// or following line is a structured Markdown element:
+/// - Bullet list items (`- `, `* `, `+ `)
+/// - Ordered list items (`1. `, `2. `, …)
+/// - ATX headings (`#`, `##`, …)
+/// - Table rows (`|`)
+/// - Code fence delimiters (`` ``` ``)
+///
+/// Content inside fenced code blocks is also preserved verbatim.
+pub fn normalize_line_breaks(text: &str) -> String {
+ fn is_structured_line(line: &str) -> bool {
+ let trimmed = line.trim_start();
+ if trimmed.is_empty() {
+ return false;
+ }
+ if trimmed.starts_with('#')
+ || trimmed.starts_with("- ")
+ || trimmed.starts_with("* ")
+ || trimmed.starts_with("+ ")
+ || trimmed.starts_with('|')
+ || trimmed.starts_with("```")
+ {
+ return true;
+ }
+ // Ordered list: one or more digits followed by ". "
+ let after_digits = trimmed.trim_start_matches(|c: char| c.is_ascii_digit());
+ if !after_digits.is_empty()
+ && after_digits.starts_with(". ")
+ && after_digits.len() < trimmed.len()
+ {
+ return true;
+ }
+ // Horizontal rules: lines made entirely of -, *, or _ (at least 3 chars).
+ let all_hr_chars = trimmed
+ .chars()
+ .all(|c| matches!(c, '-' | '*' | '_' | ' '));
+ let hr_char_count = trimmed.chars().filter(|c| !c.is_whitespace()).count();
+ all_hr_chars && hr_char_count >= 3
+ }
+
+ let lines: Vec<&str> = text.split('\n').collect();
+ let mut result: Vec<&str> = Vec::with_capacity(lines.len() * 2);
+ let mut in_code_fence = false;
+
+ for (i, &line) in lines.iter().enumerate() {
+ if line.trim_start().starts_with("```") {
+ in_code_fence = !in_code_fence;
+ }
+
+ if i == 0 || in_code_fence {
+ result.push(line);
+ continue;
+ }
+
+ let prev_line = lines[i - 1];
+
+ // Insert a blank separator when both the current and previous lines
+ // are non-empty prose (not inside a code fence, not structured Markdown).
+ let should_double = !line.is_empty()
+ && !prev_line.is_empty()
+ && !is_structured_line(line)
+ && !is_structured_line(prev_line);
+
+ if should_double {
+ result.push("");
+ }
+ result.push(line);
+ }
+
+ result.join("\n")
+}
+
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
@@ -312,4 +397,92 @@ mod tests {
assert_eq!(all_paragraphs, vec!["First para.", "Second para."]);
assert_eq!(buf, "Third.");
}
+
+ // -- normalize_line_breaks -----------------------------------------------
+
+ #[test]
+ fn normalize_prose_single_newline_becomes_double() {
+ let input = "Sentence one.\nSentence two.";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, "Sentence one.\n\nSentence two.");
+ }
+
+ #[test]
+ fn normalize_existing_double_newline_unchanged() {
+ let input = "Paragraph one.\n\nParagraph two.";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, "Paragraph one.\n\nParagraph two.");
+ }
+
+ #[test]
+ fn normalize_bullet_list_single_newlines_preserved() {
+ let input = "- item one\n- item two\n- item three";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, "- item one\n- item two\n- item three");
+ }
+
+ #[test]
+ fn normalize_heading_single_newline_preserved() {
+ let input = "# My Heading\nSome text below.";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, "# My Heading\nSome text below.");
+ }
+
+ #[test]
+ fn normalize_table_rows_single_newlines_preserved() {
+ let input = "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, "| Col A | Col B |\n| --- | --- |\n| val1 | val2 |");
+ }
+
+ #[test]
+ fn normalize_code_block_content_preserved_verbatim() {
+ let input = "```rust\nlet x = 1;\nlet y = 2;\n```";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, input);
+ }
+
+ #[test]
+ fn normalize_code_block_with_blank_line_inside_preserved() {
+ let input = "```\nfn foo() {\n let x = 1;\n\n let y = 2;\n}\n```";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, input);
+ }
+
+ #[test]
+ fn normalize_mixed_prose_and_code_block() {
+ let input = "First sentence.\nSecond sentence.\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nThird sentence.\nFourth sentence.";
+ let output = normalize_line_breaks(input);
+ // Prose sentences before and after the code block get doubled.
+ // The code block itself is preserved.
+ assert!(output.contains("First sentence.\n\nSecond sentence."), "prose before code: {output}");
+ assert!(output.contains("```rust\nlet x = 1;\nlet y = 2;\n```"), "code block preserved: {output}");
+ assert!(output.contains("Third sentence.\n\nFourth sentence."), "prose after code: {output}");
+ }
+
+ #[test]
+ fn normalize_ordered_list_single_newlines_preserved() {
+ let input = "1. First item\n2. Second item\n3. Third item";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, "1. First item\n2. Second item\n3. Third item");
+ }
+
+ #[test]
+ fn normalize_empty_string_unchanged() {
+ assert_eq!(normalize_line_breaks(""), "");
+ }
+
+ #[test]
+ fn normalize_single_line_unchanged() {
+ assert_eq!(normalize_line_breaks("Hello."), "Hello.");
+ }
+
+ #[test]
+ fn normalize_prose_then_bullet_no_extra_blank() {
+ // When prose is followed by a bullet item, no extra blank is inserted
+ // because the bullet line is structured.
+ let input = "Some prose.\n- bullet item";
+ let output = normalize_line_breaks(input);
+ assert_eq!(output, "Some prose.\n- bullet item");
+ }
}