354 lines
13 KiB
Rust
354 lines
13 KiB
Rust
//! Matrix mention detection — checks whether a message mentions the bot.
|
|
use matrix_sdk::ruma::events::room::message::{Relation, RoomMessageEventContentWithoutRelation};
|
|
use matrix_sdk::ruma::{OwnedEventId, OwnedUserId};
|
|
use std::collections::HashSet;
|
|
use tokio::sync::Mutex as TokioMutex;
|
|
|
|
/// Returns `true` if the message mentions the bot.
|
|
///
|
|
/// Checks both the plain-text `body` and an optional `formatted_body` (HTML).
|
|
/// Recognised forms:
|
|
/// - The bot's full Matrix user ID (e.g. `@timmy:homeserver.local`) in either body
|
|
/// - The localpart with `@` prefix (e.g. `@timmy`) with word-boundary check
|
|
/// - A `matrix.to` link containing the user ID (in `formatted_body`)
|
|
///
|
|
/// Short mentions are only counted when not immediately followed by an
|
|
/// alphanumeric character, hyphen, or underscore to avoid false positives.
|
|
pub fn mentions_bot(body: &str, formatted_body: Option<&str>, bot_user_id: &OwnedUserId) -> bool {
|
|
let full_id = bot_user_id.as_str();
|
|
let localpart = bot_user_id.localpart();
|
|
|
|
// Check formatted_body for a matrix.to link containing the bot's user ID.
|
|
if formatted_body.is_some_and(|html| html.contains(full_id)) {
|
|
return true;
|
|
}
|
|
|
|
// Check plain body for the full ID.
|
|
if body.contains(full_id) {
|
|
return true;
|
|
}
|
|
|
|
// Check plain body for @localpart (e.g. "@timmy") with word boundaries.
|
|
if contains_word(body, &format!("@{localpart}")) {
|
|
return true;
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
/// Returns `true` if `haystack` contains `needle` at a word boundary.
|
|
#[allow(clippy::string_slice)] // all indices from find() or abs+needle.len() → always char boundaries
|
|
pub(super) fn contains_word(haystack: &str, needle: &str) -> bool {
|
|
let mut start = 0;
|
|
while let Some(rel) = haystack[start..].find(needle) {
|
|
let abs = start + rel;
|
|
let after = abs + needle.len();
|
|
let next = haystack[after..].chars().next();
|
|
let is_word_end = next.is_none_or(|c| !c.is_alphanumeric() && c != '-' && c != '_');
|
|
if is_word_end {
|
|
return true;
|
|
}
|
|
start = abs + 1;
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Returns `true` if the message's `relates_to` field references an event that
|
|
/// the bot previously sent (i.e. the message is a reply or thread-reply to a
|
|
/// bot message).
|
|
pub(super) async fn is_reply_to_bot(
|
|
relates_to: Option<&Relation<RoomMessageEventContentWithoutRelation>>,
|
|
bot_sent_event_ids: &TokioMutex<HashSet<OwnedEventId>>,
|
|
) -> bool {
|
|
let candidate_ids: Vec<&OwnedEventId> = match relates_to {
|
|
Some(Relation::Reply { in_reply_to }) => vec![&in_reply_to.event_id],
|
|
Some(Relation::Thread(thread)) => {
|
|
let mut ids = vec![&thread.event_id];
|
|
if let Some(irti) = &thread.in_reply_to {
|
|
ids.push(&irti.event_id);
|
|
}
|
|
ids
|
|
}
|
|
_ => return false,
|
|
};
|
|
let guard = bot_sent_event_ids.lock().await;
|
|
candidate_ids.iter().any(|id| guard.contains(*id))
|
|
}
|
|
|
|
/// Returns `true` when the message body appears to be explicitly addressed to
|
|
/// someone **other** than this bot.
|
|
///
|
|
/// Recognised address patterns at the start of the body:
|
|
/// - `"name: rest"` — display-name style (e.g. `"sally: do X"`)
|
|
/// - `"@name rest"` — @ mention style (e.g. `"@sally do X"`)
|
|
///
|
|
/// A message is only considered addressed to another party when the name does
|
|
/// **not** match either the bot's `bot_name` (case-insensitive) or the
|
|
/// localpart of its `bot_user_id`.
|
|
///
|
|
/// Used in ambient mode to suppress responses when a message is clearly
|
|
/// directed at a different participant (e.g. another bot in the same room).
|
|
#[allow(clippy::string_slice)] // word_end and colon_pos from find() → always char boundaries
|
|
pub fn is_addressed_to_other(body: &str, bot_user_id: &OwnedUserId, bot_name: &str) -> bool {
|
|
let trimmed = body.trim_start();
|
|
let lower = trimmed.to_lowercase();
|
|
let bot_name_lower = bot_name.to_lowercase();
|
|
let bot_localpart = bot_user_id.localpart().to_lowercase();
|
|
|
|
// Pattern A: "@name …" at the start of the message.
|
|
// Handles both "@localpart" and "@localpart:homeserver" forms.
|
|
if let Some(rest) = lower.strip_prefix('@') {
|
|
// Extract everything up to the first whitespace character.
|
|
let word_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
|
|
let mention = &rest[..word_end]; // e.g. "sally" or "sally:example.com"
|
|
|
|
// Strip the homeserver part to get just the localpart.
|
|
let localpart = mention.split(':').next().unwrap_or(mention);
|
|
|
|
if localpart.is_empty() {
|
|
return false; // bare "@" — not an address
|
|
}
|
|
if localpart == bot_localpart {
|
|
return false; // addressed to us
|
|
}
|
|
return true; // addressed to someone else
|
|
}
|
|
|
|
// Pattern B: "name: rest" — display-name style.
|
|
// Only the text before the *first* colon is inspected. We require that
|
|
// the prefix contains no spaces so that ordinary sentences such as
|
|
// "Here is a question: …" are not misread as bot addresses.
|
|
if let Some(colon_pos) = lower.find(':') {
|
|
let prefix = &lower[..colon_pos];
|
|
|
|
// Single-word prefix (no spaces).
|
|
if !prefix.contains(' ') && !prefix.is_empty() {
|
|
if prefix == bot_name_lower || prefix == bot_localpart {
|
|
return false; // addressed to us
|
|
}
|
|
return true; // addressed to someone else
|
|
}
|
|
|
|
// Multi-word prefix: only treat as an address if it is an exact
|
|
// case-insensitive match for our display name.
|
|
if prefix == bot_name_lower {
|
|
return false; // addressed to us
|
|
}
|
|
// Otherwise the colon is part of a regular sentence — not an address.
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Tests
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::sync::Arc;
|
|
|
|
fn make_user_id(s: &str) -> OwnedUserId {
|
|
s.parse().unwrap()
|
|
}
|
|
|
|
// -- mentions_bot -------------------------------------------------------
|
|
|
|
#[test]
|
|
fn mentions_bot_by_full_id() {
|
|
let uid = make_user_id("@timmy:homeserver.local");
|
|
assert!(mentions_bot(
|
|
"hello @timmy:homeserver.local can you help?",
|
|
None,
|
|
&uid
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn mentions_bot_by_localpart_at_start() {
|
|
let uid = make_user_id("@timmy:homeserver.local");
|
|
assert!(mentions_bot("@timmy please list open stories", None, &uid));
|
|
}
|
|
|
|
#[test]
|
|
fn mentions_bot_by_localpart_mid_sentence() {
|
|
let uid = make_user_id("@timmy:homeserver.local");
|
|
assert!(mentions_bot("hey @timmy what's the status?", None, &uid));
|
|
}
|
|
|
|
#[test]
|
|
fn mentions_bot_not_mentioned() {
|
|
let uid = make_user_id("@timmy:homeserver.local");
|
|
assert!(!mentions_bot(
|
|
"can someone help me with this PR?",
|
|
None,
|
|
&uid
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn mentions_bot_no_false_positive_longer_username() {
|
|
// "@timmybot" must NOT match "@timmy"
|
|
let uid = make_user_id("@timmy:homeserver.local");
|
|
assert!(!mentions_bot("hey @timmybot can you help?", None, &uid));
|
|
}
|
|
|
|
#[test]
|
|
fn mentions_bot_at_end_of_string() {
|
|
let uid = make_user_id("@timmy:homeserver.local");
|
|
assert!(mentions_bot("shoutout to @timmy", None, &uid));
|
|
}
|
|
|
|
#[test]
|
|
fn mentions_bot_followed_by_comma() {
|
|
let uid = make_user_id("@timmy:homeserver.local");
|
|
assert!(mentions_bot("@timmy, can you help?", None, &uid));
|
|
}
|
|
|
|
// -- is_reply_to_bot ----------------------------------------------------
|
|
|
|
#[tokio::test]
|
|
async fn is_reply_to_bot_direct_reply_match() {
|
|
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
|
|
Arc::new(TokioMutex::new(HashSet::new()));
|
|
let event_id: OwnedEventId = "$abc123:example.com".parse().unwrap();
|
|
sent.lock().await.insert(event_id.clone());
|
|
|
|
let in_reply_to = matrix_sdk::ruma::events::relation::InReplyTo::new(event_id);
|
|
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> =
|
|
Some(Relation::Reply { in_reply_to });
|
|
|
|
assert!(is_reply_to_bot(relates_to.as_ref(), &sent).await);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn is_reply_to_bot_direct_reply_no_match() {
|
|
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
|
|
Arc::new(TokioMutex::new(HashSet::new()));
|
|
// sent is empty — this event was not sent by the bot
|
|
|
|
let in_reply_to = matrix_sdk::ruma::events::relation::InReplyTo::new(
|
|
"$other:example.com".parse::<OwnedEventId>().unwrap(),
|
|
);
|
|
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> =
|
|
Some(Relation::Reply { in_reply_to });
|
|
|
|
assert!(!is_reply_to_bot(relates_to.as_ref(), &sent).await);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn is_reply_to_bot_no_relation() {
|
|
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
|
|
Arc::new(TokioMutex::new(HashSet::new()));
|
|
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> = None;
|
|
assert!(!is_reply_to_bot(relates_to.as_ref(), &sent).await);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn is_reply_to_bot_thread_root_match() {
|
|
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
|
|
Arc::new(TokioMutex::new(HashSet::new()));
|
|
let root_id: OwnedEventId = "$root123:example.com".parse().unwrap();
|
|
sent.lock().await.insert(root_id.clone());
|
|
|
|
// Thread reply where the thread root is the bot's message
|
|
let thread = matrix_sdk::ruma::events::relation::Thread::plain(
|
|
root_id,
|
|
"$latest:example.com".parse::<OwnedEventId>().unwrap(),
|
|
);
|
|
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> =
|
|
Some(Relation::Thread(thread));
|
|
|
|
assert!(is_reply_to_bot(relates_to.as_ref(), &sent).await);
|
|
}
|
|
|
|
// -- is_addressed_to_other ----------------------------------------------
|
|
|
|
#[test]
|
|
fn addressed_to_other_display_name_colon() {
|
|
// "sally: do X" — addressed to sally, not our bot (stu)
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(is_addressed_to_other("sally: do X", &uid, "stu"));
|
|
}
|
|
|
|
#[test]
|
|
fn addressed_to_other_at_mention() {
|
|
// "@sally do X" — addressed to sally, not our bot (stu)
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(is_addressed_to_other("@sally do X", &uid, "stu"));
|
|
}
|
|
|
|
#[test]
|
|
fn addressed_to_other_at_mention_full_id() {
|
|
// "@sally:homeserver.local do X" — localpart is still "sally"
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(is_addressed_to_other(
|
|
"@sally:homeserver.local do X",
|
|
&uid,
|
|
"stu"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn not_addressed_to_other_self_display_name() {
|
|
// "stu: do X" — addressed to us
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(!is_addressed_to_other("stu: do X", &uid, "stu"));
|
|
}
|
|
|
|
#[test]
|
|
fn not_addressed_to_other_self_at_mention() {
|
|
// "@stu do X" — addressed to us
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(!is_addressed_to_other("@stu do X", &uid, "stu"));
|
|
}
|
|
|
|
#[test]
|
|
fn not_addressed_to_other_self_at_mention_full_id() {
|
|
// "@stu:homeserver.local do X" — addressed to us
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(!is_addressed_to_other(
|
|
"@stu:homeserver.local do X",
|
|
&uid,
|
|
"stu"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn not_addressed_to_other_no_addressee() {
|
|
// No explicit addressee — ambient message for everyone
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(!is_addressed_to_other(
|
|
"what's the status of the pipeline?",
|
|
&uid,
|
|
"stu"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn not_addressed_to_other_sentence_with_colon() {
|
|
// Regular sentence with colon — not an address
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(!is_addressed_to_other(
|
|
"here is the answer: it depends",
|
|
&uid,
|
|
"stu"
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn not_addressed_to_other_display_name_case_insensitive() {
|
|
// "STU: do X" — case-insensitive match against our name "stu"
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(!is_addressed_to_other("STU: do X", &uid, "stu"));
|
|
}
|
|
|
|
#[test]
|
|
fn addressed_to_other_case_insensitive_other_name() {
|
|
// "SALLY: do X" — addressed to sally, not us
|
|
let uid = make_user_id("@stu:homeserver.local");
|
|
assert!(is_addressed_to_other("SALLY: do X", &uid, "stu"));
|
|
}
|
|
}
|