storkit: accept 383_refactor_reorganize_chat_system_into_chat_module_with_transport_submodules
This commit is contained in:
@@ -31,6 +31,7 @@ uuid = { workspace = true, features = ["v4", "serde"] }
|
||||
walkdir = { workspace = true }
|
||||
matrix-sdk = { workspace = true }
|
||||
pulldown-cmark = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
|
||||
# Force bundled SQLite so static musl builds don't need a system libsqlite3
|
||||
libsqlite3-sys = { version = "0.35.0", features = ["bundled"] }
|
||||
|
||||
@@ -728,6 +728,73 @@ pub fn chunk_for_whatsapp(text: &str) -> Vec<String> {
|
||||
chunks
|
||||
}
|
||||
|
||||
// ── Markdown → WhatsApp formatting ───────────────────────────────────
|
||||
|
||||
/// Convert standard Markdown formatting to WhatsApp-native formatting.
|
||||
///
|
||||
/// WhatsApp supports a limited subset of formatting:
|
||||
/// - Bold: `*text*`
|
||||
/// - Italic: `_text_`
|
||||
/// - Strikethrough: `~text~`
|
||||
/// - Monospace / code: backtick-delimited (same as Markdown)
|
||||
///
|
||||
/// This function converts common Markdown constructs so messages render
|
||||
/// nicely in WhatsApp instead of showing raw Markdown syntax.
|
||||
pub fn markdown_to_whatsapp(text: &str) -> String {
|
||||
use regex::Regex;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
// Regexes are compiled once and reused across calls.
|
||||
static RE_FENCED_BLOCK: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"(?ms)^```.*?\n(.*?)^```").unwrap());
|
||||
static RE_HEADER: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"(?m)^#{1,6}\s+(.+)$").unwrap());
|
||||
static RE_BOLD_ITALIC: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"\*\*\*(.+?)\*\*\*").unwrap());
|
||||
static RE_BOLD: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"\*\*(.+?)\*\*").unwrap());
|
||||
static RE_STRIKETHROUGH: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"~~(.+?)~~").unwrap());
|
||||
static RE_LINK: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
|
||||
static RE_HR: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"(?m)^---+$").unwrap());
|
||||
|
||||
// 1. Protect fenced code blocks by replacing them with placeholders.
|
||||
let mut code_blocks: Vec<String> = Vec::new();
|
||||
let protected = RE_FENCED_BLOCK.replace_all(text, |caps: ®ex::Captures| {
|
||||
let idx = code_blocks.len();
|
||||
code_blocks.push(caps[0].to_string());
|
||||
format!("\x00CODEBLOCK{idx}\x00")
|
||||
});
|
||||
let mut out = protected.into_owned();
|
||||
|
||||
// 2. Headers → bold text.
|
||||
out = RE_HEADER.replace_all(&out, "*$1*").into_owned();
|
||||
|
||||
// 3. Bold+italic (***text***) → bold italic (*_text_*).
|
||||
out = RE_BOLD_ITALIC.replace_all(&out, "*_${1}_*").into_owned();
|
||||
|
||||
// 4. Bold (**text**) → WhatsApp bold (*text*).
|
||||
out = RE_BOLD.replace_all(&out, "*$1*").into_owned();
|
||||
|
||||
// 5. Strikethrough (~~text~~) → WhatsApp strikethrough (~text~).
|
||||
out = RE_STRIKETHROUGH.replace_all(&out, "~$1~").into_owned();
|
||||
|
||||
// 6. Links [text](url) → text (url).
|
||||
out = RE_LINK.replace_all(&out, "$1 ($2)").into_owned();
|
||||
|
||||
// 7. Horizontal rules → empty line (just remove them).
|
||||
out = RE_HR.replace_all(&out, "").into_owned();
|
||||
|
||||
// 8. Restore code blocks.
|
||||
for (idx, block) in code_blocks.iter().enumerate() {
|
||||
out = out.replace(&format!("\x00CODEBLOCK{idx}\x00"), block);
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
// ── Conversation history persistence ─────────────────────────────────
|
||||
|
||||
/// Per-sender conversation history, keyed by phone number.
|
||||
@@ -924,7 +991,8 @@ async fn handle_incoming_message(ctx: &WhatsAppWebhookContext, sender: &str, mes
|
||||
|
||||
if let Some(response) = try_handle_command(&dispatch, message) {
|
||||
slog!("[whatsapp] Sending command response to {sender}");
|
||||
if let Err(e) = ctx.transport.send_message(sender, &response, "").await {
|
||||
let formatted = markdown_to_whatsapp(&response);
|
||||
if let Err(e) = ctx.transport.send_message(sender, &formatted, "").await {
|
||||
slog!("[whatsapp] Failed to send reply to {sender}: {e}");
|
||||
}
|
||||
return;
|
||||
@@ -1020,8 +1088,9 @@ async fn handle_llm_message(ctx: &WhatsAppWebhookContext, sender: &str, user_mes
|
||||
let post_sender = sender.to_string();
|
||||
let post_task = tokio::spawn(async move {
|
||||
while let Some(chunk) = msg_rx.recv().await {
|
||||
// Split into WhatsApp-sized chunks.
|
||||
for part in chunk_for_whatsapp(&chunk) {
|
||||
// Convert Markdown to WhatsApp formatting, then split into sized chunks.
|
||||
let formatted = markdown_to_whatsapp(&chunk);
|
||||
for part in chunk_for_whatsapp(&formatted) {
|
||||
let _ = post_transport.send_message(&post_sender, &part, "").await;
|
||||
}
|
||||
}
|
||||
@@ -1510,6 +1579,81 @@ mod tests {
|
||||
assert_eq!(chunks, vec![""]);
|
||||
}
|
||||
|
||||
// ── markdown_to_whatsapp tests ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_converts_headers_to_bold() {
|
||||
assert_eq!(markdown_to_whatsapp("# Title"), "*Title*");
|
||||
assert_eq!(markdown_to_whatsapp("## Subtitle"), "*Subtitle*");
|
||||
assert_eq!(markdown_to_whatsapp("### Section"), "*Section*");
|
||||
assert_eq!(markdown_to_whatsapp("###### Deep"), "*Deep*");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_converts_bold() {
|
||||
assert_eq!(markdown_to_whatsapp("**bold text**"), "*bold text*");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_converts_bold_italic() {
|
||||
assert_eq!(markdown_to_whatsapp("***emphasis***"), "*_emphasis_*");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_converts_strikethrough() {
|
||||
assert_eq!(markdown_to_whatsapp("~~removed~~"), "~removed~");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_converts_links() {
|
||||
assert_eq!(
|
||||
markdown_to_whatsapp("[click here](https://example.com)"),
|
||||
"click here (https://example.com)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_removes_horizontal_rules() {
|
||||
assert_eq!(markdown_to_whatsapp("above\n---\nbelow"), "above\n\nbelow");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_preserves_inline_code() {
|
||||
assert_eq!(markdown_to_whatsapp("use `foo()` here"), "use `foo()` here");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_preserves_code_blocks() {
|
||||
let input = "before\n```rust\nfn main() {\n println!(\"**not bold**\");\n}\n```\nafter";
|
||||
let output = markdown_to_whatsapp(input);
|
||||
// Code block content must NOT be converted.
|
||||
assert!(output.contains("\"**not bold**\""));
|
||||
// But surrounding text is still converted.
|
||||
assert!(output.contains("before"));
|
||||
assert!(output.contains("after"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_mixed_message() {
|
||||
let input = "### Philosophy\n- **Stories** define the change\n- ~~old~~ is gone\n- See [docs](https://example.com)";
|
||||
let output = markdown_to_whatsapp(input);
|
||||
assert!(output.starts_with("*Philosophy*"));
|
||||
assert!(output.contains("*Stories*"));
|
||||
assert!(output.contains("~old~"));
|
||||
assert!(output.contains("docs (https://example.com)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_passthrough_plain_text() {
|
||||
let plain = "Hello, how are you?";
|
||||
assert_eq!(markdown_to_whatsapp(plain), plain);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn md_to_wa_empty_string() {
|
||||
assert_eq!(markdown_to_whatsapp(""), "");
|
||||
}
|
||||
|
||||
// ── WhatsApp history persistence tests ──────────────────────────────
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user