From 821345d26680fe7d4ed6114dd512fc0507404f85 Mon Sep 17 00:00:00 2001 From: dave Date: Tue, 24 Mar 2026 21:41:42 +0000 Subject: [PATCH] storkit: accept 383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules --- ...kdown_to_whatsapp_formatting_conversion.md | 23 --- ...o_chat_module_with_transport_submodules.md | 0 Cargo.lock | 1 + Cargo.toml | 1 + server/Cargo.toml | 1 + server/src/chat/transport/whatsapp.rs | 150 +++++++++++++++++- 6 files changed, 150 insertions(+), 26 deletions(-) delete mode 100644 .storkit/work/1_backlog/384_story_whatsapp_markdown_to_whatsapp_formatting_conversion.md rename .storkit/work/{5_done => 6_archived}/383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules.md (100%) diff --git a/.storkit/work/1_backlog/384_story_whatsapp_markdown_to_whatsapp_formatting_conversion.md b/.storkit/work/1_backlog/384_story_whatsapp_markdown_to_whatsapp_formatting_conversion.md deleted file mode 100644 index 444ef76..0000000 --- a/.storkit/work/1_backlog/384_story_whatsapp_markdown_to_whatsapp_formatting_conversion.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: "WhatsApp markdown-to-WhatsApp formatting conversion" ---- - -# Story 384: WhatsApp markdown-to-WhatsApp formatting conversion - -## User Story - -As a WhatsApp user, I want bot messages to use WhatsApp-native formatting instead of raw markdown, so that headers, bold text, and links render properly. - -## Acceptance Criteria - -- [ ] Headers (# ## ### etc.) are converted to bold text (*Header*) in WhatsApp messages -- [ ] Markdown bold (**text**) is converted to WhatsApp bold (*text*) -- [ ] Markdown strikethrough (~~text~~) is converted to WhatsApp strikethrough (~text~) -- [ ] Markdown links [text](url) are converted to readable format: text (url) -- [ ] Code blocks and inline code are preserved as-is (already compatible) -- [ ] Matrix bot formatting is completely unaffected (conversion only applied in WhatsApp send paths) -- [ ] Existing WhatsApp chunking (4096 char limit) still works correctly after conversion - -## Out of Scope - -- TBD diff --git a/.storkit/work/5_done/383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules.md b/.storkit/work/6_archived/383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules.md similarity index 100% rename from .storkit/work/5_done/383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules.md rename to .storkit/work/6_archived/383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules.md diff --git a/Cargo.lock b/Cargo.lock index afa592d..bd4fe88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4037,6 +4037,7 @@ dependencies = [ "poem-openapi", "portable-pty", "pulldown-cmark", + "regex", "reqwest 0.13.2", "rust-embed", "serde", diff --git a/Cargo.toml b/Cargo.toml index 5e10581..1bf78bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,3 +38,4 @@ matrix-sdk = { version = "0.16.0", default-features = false, features = [ pulldown-cmark = { version = "0.13.3", default-features = false, features = [ "html", ] } +regex = "1" diff --git a/server/Cargo.toml b/server/Cargo.toml index 5e27358..cb0d733 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -31,6 +31,7 @@ uuid = { workspace = true, features = ["v4", "serde"] } walkdir = { workspace = true } matrix-sdk = { workspace = true } pulldown-cmark = { workspace = true } +regex = { workspace = true } # Force bundled SQLite so static musl builds don't need a system libsqlite3 libsqlite3-sys = { version = "0.35.0", features = ["bundled"] } diff --git a/server/src/chat/transport/whatsapp.rs b/server/src/chat/transport/whatsapp.rs index 860ef9f..8d8e148 100644 --- a/server/src/chat/transport/whatsapp.rs +++ b/server/src/chat/transport/whatsapp.rs @@ -728,6 +728,73 @@ pub fn chunk_for_whatsapp(text: &str) -> Vec { chunks } +// ── Markdown → WhatsApp formatting ─────────────────────────────────── + +/// Convert standard Markdown formatting to WhatsApp-native formatting. +/// +/// WhatsApp supports a limited subset of formatting: +/// - Bold: `*text*` +/// - Italic: `_text_` +/// - Strikethrough: `~text~` +/// - Monospace / code: backtick-delimited (same as Markdown) +/// +/// This function converts common Markdown constructs so messages render +/// nicely in WhatsApp instead of showing raw Markdown syntax. +pub fn markdown_to_whatsapp(text: &str) -> String { + use regex::Regex; + use std::sync::LazyLock; + + // Regexes are compiled once and reused across calls. + static RE_FENCED_BLOCK: LazyLock = + LazyLock::new(|| Regex::new(r"(?ms)^```.*?\n(.*?)^```").unwrap()); + static RE_HEADER: LazyLock = + LazyLock::new(|| Regex::new(r"(?m)^#{1,6}\s+(.+)$").unwrap()); + static RE_BOLD_ITALIC: LazyLock = + LazyLock::new(|| Regex::new(r"\*\*\*(.+?)\*\*\*").unwrap()); + static RE_BOLD: LazyLock = + LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap()); + static RE_STRIKETHROUGH: LazyLock = + LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap()); + static RE_LINK: LazyLock = + LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap()); + static RE_HR: LazyLock = + LazyLock::new(|| Regex::new(r"(?m)^---+$").unwrap()); + + // 1. Protect fenced code blocks by replacing them with placeholders. + let mut code_blocks: Vec = Vec::new(); + let protected = RE_FENCED_BLOCK.replace_all(text, |caps: ®ex::Captures| { + let idx = code_blocks.len(); + code_blocks.push(caps[0].to_string()); + format!("\x00CODEBLOCK{idx}\x00") + }); + let mut out = protected.into_owned(); + + // 2. Headers → bold text. + out = RE_HEADER.replace_all(&out, "*$1*").into_owned(); + + // 3. Bold+italic (***text***) → bold italic (*_text_*). + out = RE_BOLD_ITALIC.replace_all(&out, "*_${1}_*").into_owned(); + + // 4. Bold (**text**) → WhatsApp bold (*text*). + out = RE_BOLD.replace_all(&out, "*$1*").into_owned(); + + // 5. Strikethrough (~~text~~) → WhatsApp strikethrough (~text~). + out = RE_STRIKETHROUGH.replace_all(&out, "~$1~").into_owned(); + + // 6. Links [text](url) → text (url). + out = RE_LINK.replace_all(&out, "$1 ($2)").into_owned(); + + // 7. Horizontal rules → empty line (just remove them). + out = RE_HR.replace_all(&out, "").into_owned(); + + // 8. Restore code blocks. + for (idx, block) in code_blocks.iter().enumerate() { + out = out.replace(&format!("\x00CODEBLOCK{idx}\x00"), block); + } + + out +} + // ── Conversation history persistence ───────────────────────────────── /// Per-sender conversation history, keyed by phone number. @@ -924,7 +991,8 @@ async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, mes if let Some(response) = try_handle_command(&dispatch, message) { slog!("[whatsapp] Sending command response to {sender}"); - if let Err(e) = ctx.transport.send_message(sender, &response, "").await { + let formatted = markdown_to_whatsapp(&response); + if let Err(e) = ctx.transport.send_message(sender, &formatted, "").await { slog!("[whatsapp] Failed to send reply to {sender}: {e}"); } return; @@ -1020,8 +1088,9 @@ async fn handle_llm_message(ctx: &WhatsAppWebhookContext, sender: &str, user_mes let post_sender = sender.to_string(); let post_task = tokio::spawn(async move { while let Some(chunk) = msg_rx.recv().await { - // Split into WhatsApp-sized chunks. - for part in chunk_for_whatsapp(&chunk) { + // Convert Markdown to WhatsApp formatting, then split into sized chunks. + let formatted = markdown_to_whatsapp(&chunk); + for part in chunk_for_whatsapp(&formatted) { let _ = post_transport.send_message(&post_sender, &part, "").await; } } @@ -1510,6 +1579,81 @@ mod tests { assert_eq!(chunks, vec![""]); } + // ── markdown_to_whatsapp tests ──────────────────────────────────────── + + #[test] + fn md_to_wa_converts_headers_to_bold() { + assert_eq!(markdown_to_whatsapp("# Title"), "*Title*"); + assert_eq!(markdown_to_whatsapp("## Subtitle"), "*Subtitle*"); + assert_eq!(markdown_to_whatsapp("### Section"), "*Section*"); + assert_eq!(markdown_to_whatsapp("###### Deep"), "*Deep*"); + } + + #[test] + fn md_to_wa_converts_bold() { + assert_eq!(markdown_to_whatsapp("**bold text**"), "*bold text*"); + } + + #[test] + fn md_to_wa_converts_bold_italic() { + assert_eq!(markdown_to_whatsapp("***emphasis***"), "*_emphasis_*"); + } + + #[test] + fn md_to_wa_converts_strikethrough() { + assert_eq!(markdown_to_whatsapp("~~removed~~"), "~removed~"); + } + + #[test] + fn md_to_wa_converts_links() { + assert_eq!( + markdown_to_whatsapp("[click here](https://example.com)"), + "click here (https://example.com)" + ); + } + + #[test] + fn md_to_wa_removes_horizontal_rules() { + assert_eq!(markdown_to_whatsapp("above\n---\nbelow"), "above\n\nbelow"); + } + + #[test] + fn md_to_wa_preserves_inline_code() { + assert_eq!(markdown_to_whatsapp("use `foo()` here"), "use `foo()` here"); + } + + #[test] + fn md_to_wa_preserves_code_blocks() { + let input = "before\n```rust\nfn main() {\n println!(\"**not bold**\");\n}\n```\nafter"; + let output = markdown_to_whatsapp(input); + // Code block content must NOT be converted. + assert!(output.contains("\"**not bold**\"")); + // But surrounding text is still converted. + assert!(output.contains("before")); + assert!(output.contains("after")); + } + + #[test] + fn md_to_wa_mixed_message() { + let input = "### Philosophy\n- **Stories** define the change\n- ~~old~~ is gone\n- See [docs](https://example.com)"; + let output = markdown_to_whatsapp(input); + assert!(output.starts_with("*Philosophy*")); + assert!(output.contains("*Stories*")); + assert!(output.contains("~old~")); + assert!(output.contains("docs (https://example.com)")); + } + + #[test] + fn md_to_wa_passthrough_plain_text() { + let plain = "Hello, how are you?"; + assert_eq!(markdown_to_whatsapp(plain), plain); + } + + #[test] + fn md_to_wa_empty_string() { + assert_eq!(markdown_to_whatsapp(""), ""); + } + // ── WhatsApp history persistence tests ────────────────────────────── #[test]