3 Commits

6 changed files with 150 additions and 3 deletions

1
Cargo.lock generated
View File

@@ -4037,6 +4037,7 @@ dependencies = [
"poem-openapi", "poem-openapi",
"portable-pty", "portable-pty",
"pulldown-cmark", "pulldown-cmark",
"regex",
"reqwest 0.13.2", "reqwest 0.13.2",
"rust-embed", "rust-embed",
"serde", "serde",

View File

@@ -38,3 +38,4 @@ matrix-sdk = { version = "0.16.0", default-features = false, features = [
pulldown-cmark = { version = "0.13.3", default-features = false, features = [ pulldown-cmark = { version = "0.13.3", default-features = false, features = [
"html", "html",
] } ] }
regex = "1"

View File

@@ -31,6 +31,7 @@ uuid = { workspace = true, features = ["v4", "serde"] }
walkdir = { workspace = true } walkdir = { workspace = true }
matrix-sdk = { workspace = true } matrix-sdk = { workspace = true }
pulldown-cmark = { workspace = true } pulldown-cmark = { workspace = true }
regex = { workspace = true }
# Force bundled SQLite so static musl builds don't need a system libsqlite3 # Force bundled SQLite so static musl builds don't need a system libsqlite3
libsqlite3-sys = { version = "0.35.0", features = ["bundled"] } libsqlite3-sys = { version = "0.35.0", features = ["bundled"] }

View File

@@ -728,6 +728,73 @@ pub fn chunk_for_whatsapp(text: &str) -> Vec<String> {
chunks chunks
} }
// ── Markdown → WhatsApp formatting ───────────────────────────────────
/// Convert standard Markdown formatting to WhatsApp-native formatting.
///
/// WhatsApp supports a limited subset of formatting:
/// - Bold: `*text*`
/// - Italic: `_text_`
/// - Strikethrough: `~text~`
/// - Monospace / code: backtick-delimited (same as Markdown)
///
/// This function converts common Markdown constructs so messages render
/// nicely in WhatsApp instead of showing raw Markdown syntax.
pub fn markdown_to_whatsapp(text: &str) -> String {
use regex::Regex;
use std::sync::LazyLock;
// Regexes are compiled once and reused across calls.
static RE_FENCED_BLOCK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?ms)^```.*?\n(.*?)^```").unwrap());
static RE_HEADER: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?m)^#{1,6}\s+(.+)$").unwrap());
static RE_BOLD_ITALIC: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\*\*\*(.+?)\*\*\*").unwrap());
static RE_BOLD: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap());
static RE_STRIKETHROUGH: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap());
static RE_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
static RE_HR: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?m)^---+$").unwrap());
// 1. Protect fenced code blocks by replacing them with placeholders.
let mut code_blocks: Vec<String> = Vec::new();
let protected = RE_FENCED_BLOCK.replace_all(text, |caps: &regex::Captures| {
let idx = code_blocks.len();
code_blocks.push(caps[0].to_string());
format!("\x00CODEBLOCK{idx}\x00")
});
let mut out = protected.into_owned();
// 2. Headers → bold text.
out = RE_HEADER.replace_all(&out, "*$1*").into_owned();
// 3. Bold+italic (***text***) → bold italic (*_text_*).
out = RE_BOLD_ITALIC.replace_all(&out, "*_${1}_*").into_owned();
// 4. Bold (**text**) → WhatsApp bold (*text*).
out = RE_BOLD.replace_all(&out, "*$1*").into_owned();
// 5. Strikethrough (~~text~~) → WhatsApp strikethrough (~text~).
out = RE_STRIKETHROUGH.replace_all(&out, "~$1~").into_owned();
// 6. Links [text](url) → text (url).
out = RE_LINK.replace_all(&out, "$1 ($2)").into_owned();
// 7. Horizontal rules → empty line (just remove them).
out = RE_HR.replace_all(&out, "").into_owned();
// 8. Restore code blocks.
for (idx, block) in code_blocks.iter().enumerate() {
out = out.replace(&format!("\x00CODEBLOCK{idx}\x00"), block);
}
out
}
// ── Conversation history persistence ───────────────────────────────── // ── Conversation history persistence ─────────────────────────────────
/// Per-sender conversation history, keyed by phone number. /// Per-sender conversation history, keyed by phone number.
@@ -924,7 +991,8 @@ async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, mes
if let Some(response) = try_handle_command(&dispatch, message) { if let Some(response) = try_handle_command(&dispatch, message) {
slog!("[whatsapp] Sending command response to {sender}"); slog!("[whatsapp] Sending command response to {sender}");
if let Err(e) = ctx.transport.send_message(sender, &response, "").await { let formatted = markdown_to_whatsapp(&response);
if let Err(e) = ctx.transport.send_message(sender, &formatted, "").await {
slog!("[whatsapp] Failed to send reply to {sender}: {e}"); slog!("[whatsapp] Failed to send reply to {sender}: {e}");
} }
return; return;
@@ -1020,8 +1088,9 @@ async fn handle_llm_message(ctx: &WhatsAppWebhookContext, sender: &str, user_mes
let post_sender = sender.to_string(); let post_sender = sender.to_string();
let post_task = tokio::spawn(async move { let post_task = tokio::spawn(async move {
while let Some(chunk) = msg_rx.recv().await { while let Some(chunk) = msg_rx.recv().await {
// Split into WhatsApp-sized chunks. // Convert Markdown to WhatsApp formatting, then split into sized chunks.
for part in chunk_for_whatsapp(&chunk) { let formatted = markdown_to_whatsapp(&chunk);
for part in chunk_for_whatsapp(&formatted) {
let _ = post_transport.send_message(&post_sender, &part, "").await; let _ = post_transport.send_message(&post_sender, &part, "").await;
} }
} }
@@ -1510,6 +1579,81 @@ mod tests {
assert_eq!(chunks, vec![""]); assert_eq!(chunks, vec![""]);
} }
// ── markdown_to_whatsapp tests ────────────────────────────────────────
#[test]
fn md_to_wa_converts_headers_to_bold() {
assert_eq!(markdown_to_whatsapp("# Title"), "*Title*");
assert_eq!(markdown_to_whatsapp("## Subtitle"), "*Subtitle*");
assert_eq!(markdown_to_whatsapp("### Section"), "*Section*");
assert_eq!(markdown_to_whatsapp("###### Deep"), "*Deep*");
}
#[test]
fn md_to_wa_converts_bold() {
assert_eq!(markdown_to_whatsapp("**bold text**"), "*bold text*");
}
#[test]
fn md_to_wa_converts_bold_italic() {
assert_eq!(markdown_to_whatsapp("***emphasis***"), "*_emphasis_*");
}
#[test]
fn md_to_wa_converts_strikethrough() {
assert_eq!(markdown_to_whatsapp("~~removed~~"), "~removed~");
}
#[test]
fn md_to_wa_converts_links() {
assert_eq!(
markdown_to_whatsapp("[click here](https://example.com)"),
"click here (https://example.com)"
);
}
#[test]
fn md_to_wa_removes_horizontal_rules() {
assert_eq!(markdown_to_whatsapp("above\n---\nbelow"), "above\n\nbelow");
}
#[test]
fn md_to_wa_preserves_inline_code() {
assert_eq!(markdown_to_whatsapp("use `foo()` here"), "use `foo()` here");
}
#[test]
fn md_to_wa_preserves_code_blocks() {
let input = "before\n```rust\nfn main() {\n println!(\"**not bold**\");\n}\n```\nafter";
let output = markdown_to_whatsapp(input);
// Code block content must NOT be converted.
assert!(output.contains("\"**not bold**\""));
// But surrounding text is still converted.
assert!(output.contains("before"));
assert!(output.contains("after"));
}
#[test]
fn md_to_wa_mixed_message() {
let input = "### Philosophy\n- **Stories** define the change\n- ~~old~~ is gone\n- See [docs](https://example.com)";
let output = markdown_to_whatsapp(input);
assert!(output.starts_with("*Philosophy*"));
assert!(output.contains("*Stories*"));
assert!(output.contains("~old~"));
assert!(output.contains("docs (https://example.com)"));
}
#[test]
fn md_to_wa_passthrough_plain_text() {
let plain = "Hello, how are you?";
assert_eq!(markdown_to_whatsapp(plain), plain);
}
#[test]
fn md_to_wa_empty_string() {
assert_eq!(markdown_to_whatsapp(""), "");
}
// ── WhatsApp history persistence tests ────────────────────────────── // ── WhatsApp history persistence tests ──────────────────────────────
#[test] #[test]