Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
821345d266 | ||
|
|
0fa63e2de3 | ||
|
|
d8cbec8268 |
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4037,6 +4037,7 @@ dependencies = [
|
|||||||
"poem-openapi",
|
"poem-openapi",
|
||||||
"portable-pty",
|
"portable-pty",
|
||||||
"pulldown-cmark",
|
"pulldown-cmark",
|
||||||
|
"regex",
|
||||||
"reqwest 0.13.2",
|
"reqwest 0.13.2",
|
||||||
"rust-embed",
|
"rust-embed",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@@ -38,3 +38,4 @@ matrix-sdk = { version = "0.16.0", default-features = false, features = [
|
|||||||
pulldown-cmark = { version = "0.13.3", default-features = false, features = [
|
pulldown-cmark = { version = "0.13.3", default-features = false, features = [
|
||||||
"html",
|
"html",
|
||||||
] }
|
] }
|
||||||
|
regex = "1"
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ uuid = { workspace = true, features = ["v4", "serde"] }
|
|||||||
walkdir = { workspace = true }
|
walkdir = { workspace = true }
|
||||||
matrix-sdk = { workspace = true }
|
matrix-sdk = { workspace = true }
|
||||||
pulldown-cmark = { workspace = true }
|
pulldown-cmark = { workspace = true }
|
||||||
|
regex = { workspace = true }
|
||||||
|
|
||||||
# Force bundled SQLite so static musl builds don't need a system libsqlite3
|
# Force bundled SQLite so static musl builds don't need a system libsqlite3
|
||||||
libsqlite3-sys = { version = "0.35.0", features = ["bundled"] }
|
libsqlite3-sys = { version = "0.35.0", features = ["bundled"] }
|
||||||
|
|||||||
@@ -728,6 +728,73 @@ pub fn chunk_for_whatsapp(text: &str) -> Vec<String> {
|
|||||||
chunks
|
chunks
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Markdown → WhatsApp formatting ───────────────────────────────────
|
||||||
|
|
||||||
|
/// Convert standard Markdown formatting to WhatsApp-native formatting.
|
||||||
|
///
|
||||||
|
/// WhatsApp supports a limited subset of formatting:
|
||||||
|
/// - Bold: `*text*`
|
||||||
|
/// - Italic: `_text_`
|
||||||
|
/// - Strikethrough: `~text~`
|
||||||
|
/// - Monospace / code: backtick-delimited (same as Markdown)
|
||||||
|
///
|
||||||
|
/// This function converts common Markdown constructs so messages render
|
||||||
|
/// nicely in WhatsApp instead of showing raw Markdown syntax.
|
||||||
|
pub fn markdown_to_whatsapp(text: &str) -> String {
|
||||||
|
use regex::Regex;
|
||||||
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
|
// Regexes are compiled once and reused across calls.
|
||||||
|
static RE_FENCED_BLOCK: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"(?ms)^```.*?\n(.*?)^```").unwrap());
|
||||||
|
static RE_HEADER: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"(?m)^#{1,6}\s+(.+)$").unwrap());
|
||||||
|
static RE_BOLD_ITALIC: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"\*\*\*(.+?)\*\*\*").unwrap());
|
||||||
|
static RE_BOLD: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap());
|
||||||
|
static RE_STRIKETHROUGH: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap());
|
||||||
|
static RE_LINK: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
|
||||||
|
static RE_HR: LazyLock<Regex> =
|
||||||
|
LazyLock::new(|| Regex::new(r"(?m)^---+$").unwrap());
|
||||||
|
|
||||||
|
// 1. Protect fenced code blocks by replacing them with placeholders.
|
||||||
|
let mut code_blocks: Vec<String> = Vec::new();
|
||||||
|
let protected = RE_FENCED_BLOCK.replace_all(text, |caps: ®ex::Captures| {
|
||||||
|
let idx = code_blocks.len();
|
||||||
|
code_blocks.push(caps[0].to_string());
|
||||||
|
format!("\x00CODEBLOCK{idx}\x00")
|
||||||
|
});
|
||||||
|
let mut out = protected.into_owned();
|
||||||
|
|
||||||
|
// 2. Headers → bold text.
|
||||||
|
out = RE_HEADER.replace_all(&out, "*$1*").into_owned();
|
||||||
|
|
||||||
|
// 3. Bold+italic (***text***) → bold italic (*_text_*).
|
||||||
|
out = RE_BOLD_ITALIC.replace_all(&out, "*_${1}_*").into_owned();
|
||||||
|
|
||||||
|
// 4. Bold (**text**) → WhatsApp bold (*text*).
|
||||||
|
out = RE_BOLD.replace_all(&out, "*$1*").into_owned();
|
||||||
|
|
||||||
|
// 5. Strikethrough (~~text~~) → WhatsApp strikethrough (~text~).
|
||||||
|
out = RE_STRIKETHROUGH.replace_all(&out, "~$1~").into_owned();
|
||||||
|
|
||||||
|
// 6. Links [text](url) → text (url).
|
||||||
|
out = RE_LINK.replace_all(&out, "$1 ($2)").into_owned();
|
||||||
|
|
||||||
|
// 7. Horizontal rules → empty line (just remove them).
|
||||||
|
out = RE_HR.replace_all(&out, "").into_owned();
|
||||||
|
|
||||||
|
// 8. Restore code blocks.
|
||||||
|
for (idx, block) in code_blocks.iter().enumerate() {
|
||||||
|
out = out.replace(&format!("\x00CODEBLOCK{idx}\x00"), block);
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
// ── Conversation history persistence ─────────────────────────────────
|
// ── Conversation history persistence ─────────────────────────────────
|
||||||
|
|
||||||
/// Per-sender conversation history, keyed by phone number.
|
/// Per-sender conversation history, keyed by phone number.
|
||||||
@@ -924,7 +991,8 @@ async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, mes
|
|||||||
|
|
||||||
if let Some(response) = try_handle_command(&dispatch, message) {
|
if let Some(response) = try_handle_command(&dispatch, message) {
|
||||||
slog!("[whatsapp] Sending command response to {sender}");
|
slog!("[whatsapp] Sending command response to {sender}");
|
||||||
if let Err(e) = ctx.transport.send_message(sender, &response, "").await {
|
let formatted = markdown_to_whatsapp(&response);
|
||||||
|
if let Err(e) = ctx.transport.send_message(sender, &formatted, "").await {
|
||||||
slog!("[whatsapp] Failed to send reply to {sender}: {e}");
|
slog!("[whatsapp] Failed to send reply to {sender}: {e}");
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@@ -1020,8 +1088,9 @@ async fn handle_llm_message(ctx: &WhatsAppWebhookContext, sender: &str, user_mes
|
|||||||
let post_sender = sender.to_string();
|
let post_sender = sender.to_string();
|
||||||
let post_task = tokio::spawn(async move {
|
let post_task = tokio::spawn(async move {
|
||||||
while let Some(chunk) = msg_rx.recv().await {
|
while let Some(chunk) = msg_rx.recv().await {
|
||||||
// Split into WhatsApp-sized chunks.
|
// Convert Markdown to WhatsApp formatting, then split into sized chunks.
|
||||||
for part in chunk_for_whatsapp(&chunk) {
|
let formatted = markdown_to_whatsapp(&chunk);
|
||||||
|
for part in chunk_for_whatsapp(&formatted) {
|
||||||
let _ = post_transport.send_message(&post_sender, &part, "").await;
|
let _ = post_transport.send_message(&post_sender, &part, "").await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1510,6 +1579,81 @@ mod tests {
|
|||||||
assert_eq!(chunks, vec![""]);
|
assert_eq!(chunks, vec![""]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── markdown_to_whatsapp tests ────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_converts_headers_to_bold() {
|
||||||
|
assert_eq!(markdown_to_whatsapp("# Title"), "*Title*");
|
||||||
|
assert_eq!(markdown_to_whatsapp("## Subtitle"), "*Subtitle*");
|
||||||
|
assert_eq!(markdown_to_whatsapp("### Section"), "*Section*");
|
||||||
|
assert_eq!(markdown_to_whatsapp("###### Deep"), "*Deep*");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_converts_bold() {
|
||||||
|
assert_eq!(markdown_to_whatsapp("**bold text**"), "*bold text*");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_converts_bold_italic() {
|
||||||
|
assert_eq!(markdown_to_whatsapp("***emphasis***"), "*_emphasis_*");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_converts_strikethrough() {
|
||||||
|
assert_eq!(markdown_to_whatsapp("~~removed~~"), "~removed~");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_converts_links() {
|
||||||
|
assert_eq!(
|
||||||
|
markdown_to_whatsapp("[click here](https://example.com)"),
|
||||||
|
"click here (https://example.com)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_removes_horizontal_rules() {
|
||||||
|
assert_eq!(markdown_to_whatsapp("above\n---\nbelow"), "above\n\nbelow");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_preserves_inline_code() {
|
||||||
|
assert_eq!(markdown_to_whatsapp("use `foo()` here"), "use `foo()` here");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_preserves_code_blocks() {
|
||||||
|
let input = "before\n```rust\nfn main() {\n println!(\"**not bold**\");\n}\n```\nafter";
|
||||||
|
let output = markdown_to_whatsapp(input);
|
||||||
|
// Code block content must NOT be converted.
|
||||||
|
assert!(output.contains("\"**not bold**\""));
|
||||||
|
// But surrounding text is still converted.
|
||||||
|
assert!(output.contains("before"));
|
||||||
|
assert!(output.contains("after"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_mixed_message() {
|
||||||
|
let input = "### Philosophy\n- **Stories** define the change\n- ~~old~~ is gone\n- See [docs](https://example.com)";
|
||||||
|
let output = markdown_to_whatsapp(input);
|
||||||
|
assert!(output.starts_with("*Philosophy*"));
|
||||||
|
assert!(output.contains("*Stories*"));
|
||||||
|
assert!(output.contains("~old~"));
|
||||||
|
assert!(output.contains("docs (https://example.com)"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_passthrough_plain_text() {
|
||||||
|
let plain = "Hello, how are you?";
|
||||||
|
assert_eq!(markdown_to_whatsapp(plain), plain);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn md_to_wa_empty_string() {
|
||||||
|
assert_eq!(markdown_to_whatsapp(""), "");
|
||||||
|
}
|
||||||
|
|
||||||
// ── WhatsApp history persistence tests ──────────────────────────────
|
// ── WhatsApp history persistence tests ──────────────────────────────
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
Reference in New Issue
Block a user