//! WhatsApp message formatting — Markdown-to-WhatsApp conversion and message chunking. use regex::Regex; use std::sync::LazyLock; /// WhatsApp Business API maximum message body size in characters. pub(super) const WHATSAPP_MAX_MESSAGE_LEN: usize = 4096; /// Split a text into chunks that fit within WhatsApp's message size limit. /// /// Tries to split on paragraph boundaries (`\n\n`), falling back to line /// boundaries (`\n`), and finally hard-splitting at the character limit. pub fn chunk_for_whatsapp(text: &str) -> Vec { if text.len() <= WHATSAPP_MAX_MESSAGE_LEN { return vec![text.to_string()]; } let mut chunks = Vec::new(); let mut remaining = text; while !remaining.is_empty() { if remaining.len() <= WHATSAPP_MAX_MESSAGE_LEN { chunks.push(remaining.to_string()); break; } // Find the best split point within the limit. let window = &remaining[..WHATSAPP_MAX_MESSAGE_LEN]; // Prefer paragraph boundary. let split_pos = window .rfind("\n\n") .or_else(|| window.rfind('\n')) .unwrap_or(WHATSAPP_MAX_MESSAGE_LEN); let (chunk, rest) = remaining.split_at(split_pos); let chunk = chunk.trim(); if !chunk.is_empty() { chunks.push(chunk.to_string()); } // Skip the delimiter. remaining = rest.trim_start_matches('\n'); } chunks } /// Convert standard Markdown formatting to WhatsApp-native formatting. /// /// WhatsApp supports a limited subset of formatting: /// - Bold: `*text*` /// - Italic: `_text_` /// - Strikethrough: `~text~` /// - Monospace / code: backtick-delimited (same as Markdown) /// /// This function converts common Markdown constructs so messages render /// nicely in WhatsApp instead of showing raw Markdown syntax. pub fn markdown_to_whatsapp(text: &str) -> String { let normalized = crate::chat::util::normalize_line_breaks(text); let text = normalized.as_str(); // Regexes are compiled once and reused across calls. static RE_FENCED_BLOCK: LazyLock = LazyLock::new(|| Regex::new(r"(?ms)^```.*?\n(.*?)^```").unwrap()); static RE_HEADER: LazyLock = LazyLock::new(|| Regex::new(r"(?m)^#{1,6}\s+(.+)$").unwrap()); static RE_BOLD_ITALIC: LazyLock = LazyLock::new(|| Regex::new(r"\*\*\*(.+?)\*\*\*").unwrap()); static RE_BOLD: LazyLock = LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap()); static RE_STRIKETHROUGH: LazyLock = LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap()); static RE_LINK: LazyLock = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap()); static RE_HR: LazyLock = LazyLock::new(|| Regex::new(r"(?m)^---+$").unwrap()); // 1. Protect fenced code blocks by replacing them with placeholders. let mut code_blocks: Vec = Vec::new(); let protected = RE_FENCED_BLOCK.replace_all(text, |caps: ®ex::Captures| { let idx = code_blocks.len(); code_blocks.push(caps[0].to_string()); format!("\x00CODEBLOCK{idx}\x00") }); let mut out = protected.into_owned(); // 2. Headers → bold text. out = RE_HEADER.replace_all(&out, "*$1*").into_owned(); // 3. Bold+italic (***text***) → bold italic (*_text_*). out = RE_BOLD_ITALIC.replace_all(&out, "*_${1}_*").into_owned(); // 4. Bold (**text**) → WhatsApp bold (*text*). out = RE_BOLD.replace_all(&out, "*$1*").into_owned(); // 5. Strikethrough (~~text~~) → WhatsApp strikethrough (~text~). out = RE_STRIKETHROUGH.replace_all(&out, "~$1~").into_owned(); // 6. Links [text](url) → text (url). out = RE_LINK.replace_all(&out, "$1 ($2)").into_owned(); // 7. Horizontal rules → empty line (just remove them). out = RE_HR.replace_all(&out, "").into_owned(); // 8. Restore code blocks. for (idx, block) in code_blocks.iter().enumerate() { out = out.replace(&format!("\x00CODEBLOCK{idx}\x00"), block); } out } // ── Tests ─────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; // ── chunk_for_whatsapp tests ──────────────────────────────────────── #[test] fn chunk_short_message_returns_single_chunk() { let chunks = chunk_for_whatsapp("Hello world"); assert_eq!(chunks, vec!["Hello world"]); } #[test] fn chunk_exactly_at_limit_returns_single_chunk() { let text = "a".repeat(WHATSAPP_MAX_MESSAGE_LEN); let chunks = chunk_for_whatsapp(&text); assert_eq!(chunks.len(), 1); assert_eq!(chunks[0].len(), WHATSAPP_MAX_MESSAGE_LEN); } #[test] fn chunk_splits_on_paragraph_boundary() { // Create text with a paragraph boundary near the split point. let first_para = "a".repeat(4000); let second_para = "b".repeat(200); let text = format!("{first_para}\n\n{second_para}"); let chunks = chunk_for_whatsapp(&text); assert_eq!(chunks.len(), 2); assert_eq!(chunks[0], first_para); assert_eq!(chunks[1], second_para); } #[test] fn chunk_splits_on_line_boundary_when_no_paragraph_break() { let first_line = "a".repeat(4000); let second_line = "b".repeat(200); let text = format!("{first_line}\n{second_line}"); let chunks = chunk_for_whatsapp(&text); assert_eq!(chunks.len(), 2); assert_eq!(chunks[0], first_line); assert_eq!(chunks[1], second_line); } #[test] fn chunk_hard_splits_continuous_text() { let text = "x".repeat(WHATSAPP_MAX_MESSAGE_LEN * 2 + 100); let chunks = chunk_for_whatsapp(&text); assert!(chunks.len() >= 2); for chunk in &chunks { assert!(chunk.len() <= WHATSAPP_MAX_MESSAGE_LEN); } // Verify all content is preserved. let reassembled: String = chunks.join(""); assert_eq!(reassembled.len(), text.len()); } #[test] fn chunk_empty_string_returns_single_empty() { let chunks = chunk_for_whatsapp(""); assert_eq!(chunks, vec![""]); } // ── markdown_to_whatsapp tests ──────────────────────────────────────── #[test] fn md_to_wa_converts_headers_to_bold() { assert_eq!(markdown_to_whatsapp("# Title"), "*Title*"); assert_eq!(markdown_to_whatsapp("## Subtitle"), "*Subtitle*"); assert_eq!(markdown_to_whatsapp("### Section"), "*Section*"); assert_eq!(markdown_to_whatsapp("###### Deep"), "*Deep*"); } #[test] fn md_to_wa_converts_bold() { assert_eq!(markdown_to_whatsapp("**bold text**"), "*bold text*"); } #[test] fn md_to_wa_converts_bold_italic() { assert_eq!(markdown_to_whatsapp("***emphasis***"), "*_emphasis_*"); } #[test] fn md_to_wa_converts_strikethrough() { assert_eq!(markdown_to_whatsapp("~~removed~~"), "~removed~"); } #[test] fn md_to_wa_converts_links() { assert_eq!( markdown_to_whatsapp("[click here](https://example.com)"), "click here (https://example.com)" ); } #[test] fn md_to_wa_removes_horizontal_rules() { assert_eq!(markdown_to_whatsapp("above\n---\nbelow"), "above\n\nbelow"); } #[test] fn md_to_wa_preserves_inline_code() { assert_eq!(markdown_to_whatsapp("use `foo()` here"), "use `foo()` here"); } #[test] fn md_to_wa_preserves_code_blocks() { let input = "before\n```rust\nfn main() {\n println!(\"**not bold**\");\n}\n```\nafter"; let output = markdown_to_whatsapp(input); // Code block content must NOT be converted. assert!(output.contains("\"**not bold**\"")); // But surrounding text is still converted. assert!(output.contains("before")); assert!(output.contains("after")); } #[test] fn md_to_wa_mixed_message() { let input = "### Philosophy\n- **Stories** define the change\n- ~~old~~ is gone\n- See [docs](https://example.com)"; let output = markdown_to_whatsapp(input); assert!(output.starts_with("*Philosophy*")); assert!(output.contains("*Stories*")); assert!(output.contains("~old~")); assert!(output.contains("docs (https://example.com)")); } #[test] fn md_to_wa_passthrough_plain_text() { let plain = "Hello, how are you?"; assert_eq!(markdown_to_whatsapp(plain), plain); } #[test] fn md_to_wa_empty_string() { assert_eq!(markdown_to_whatsapp(""), ""); } }