Files
huskies/server/src/chat/transport/matrix/bot/mentions.rs
T

352 lines
12 KiB
Rust
Raw Normal View History

//! Matrix mention detection — checks whether a message mentions the bot.
use matrix_sdk::ruma::events::room::message::{Relation, RoomMessageEventContentWithoutRelation};
use matrix_sdk::ruma::{OwnedEventId, OwnedUserId};
use std::collections::HashSet;
use tokio::sync::Mutex as TokioMutex;
/// Returns `true` if the message mentions the bot.
///
/// Checks both the plain-text `body` and an optional `formatted_body` (HTML).
/// Recognised forms:
/// - The bot's full Matrix user ID (e.g. `@timmy:homeserver.local`) in either body
/// - The localpart with `@` prefix (e.g. `@timmy`) with word-boundary check
/// - A `matrix.to` link containing the user ID (in `formatted_body`)
///
/// Short mentions are only counted when not immediately followed by an
/// alphanumeric character, hyphen, or underscore to avoid false positives.
pub fn mentions_bot(body: &str, formatted_body: Option<&str>, bot_user_id: &OwnedUserId) -> bool {
let full_id = bot_user_id.as_str();
let localpart = bot_user_id.localpart();
// Check formatted_body for a matrix.to link containing the bot's user ID.
if formatted_body.is_some_and(|html| html.contains(full_id)) {
return true;
}
// Check plain body for the full ID.
if body.contains(full_id) {
return true;
}
// Check plain body for @localpart (e.g. "@timmy") with word boundaries.
if contains_word(body, &format!("@{localpart}")) {
return true;
}
false
}
/// Returns `true` if `haystack` contains `needle` at a word boundary.
pub(super) fn contains_word(haystack: &str, needle: &str) -> bool {
let mut start = 0;
while let Some(rel) = haystack[start..].find(needle) {
let abs = start + rel;
let after = abs + needle.len();
let next = haystack[after..].chars().next();
let is_word_end = next.is_none_or(|c| !c.is_alphanumeric() && c != '-' && c != '_');
if is_word_end {
return true;
}
start = abs + 1;
}
false
}
/// Returns `true` if the message's `relates_to` field references an event that
/// the bot previously sent (i.e. the message is a reply or thread-reply to a
/// bot message).
pub(super) async fn is_reply_to_bot(
relates_to: Option<&Relation<RoomMessageEventContentWithoutRelation>>,
bot_sent_event_ids: &TokioMutex<HashSet<OwnedEventId>>,
) -> bool {
let candidate_ids: Vec<&OwnedEventId> = match relates_to {
Some(Relation::Reply { in_reply_to }) => vec![&in_reply_to.event_id],
Some(Relation::Thread(thread)) => {
let mut ids = vec![&thread.event_id];
if let Some(irti) = &thread.in_reply_to {
ids.push(&irti.event_id);
}
ids
}
_ => return false,
};
let guard = bot_sent_event_ids.lock().await;
candidate_ids.iter().any(|id| guard.contains(*id))
}
/// Returns `true` when the message body appears to be explicitly addressed to
/// someone **other** than this bot.
///
/// Recognised address patterns at the start of the body:
/// - `"name: rest"` — display-name style (e.g. `"sally: do X"`)
/// - `"@name rest"` — @ mention style (e.g. `"@sally do X"`)
///
/// A message is only considered addressed to another party when the name does
/// **not** match either the bot's `bot_name` (case-insensitive) or the
/// localpart of its `bot_user_id`.
///
/// Used in ambient mode to suppress responses when a message is clearly
/// directed at a different participant (e.g. another bot in the same room).
pub fn is_addressed_to_other(body: &str, bot_user_id: &OwnedUserId, bot_name: &str) -> bool {
let trimmed = body.trim_start();
let lower = trimmed.to_lowercase();
let bot_name_lower = bot_name.to_lowercase();
let bot_localpart = bot_user_id.localpart().to_lowercase();
// Pattern A: "@name …" at the start of the message.
// Handles both "@localpart" and "@localpart:homeserver" forms.
if let Some(rest) = lower.strip_prefix('@') {
// Extract everything up to the first whitespace character.
let word_end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
let mention = &rest[..word_end]; // e.g. "sally" or "sally:example.com"
// Strip the homeserver part to get just the localpart.
let localpart = mention.split(':').next().unwrap_or(mention);
if localpart.is_empty() {
return false; // bare "@" — not an address
}
if localpart == bot_localpart {
return false; // addressed to us
}
return true; // addressed to someone else
}
// Pattern B: "name: rest" — display-name style.
// Only the text before the *first* colon is inspected. We require that
// the prefix contains no spaces so that ordinary sentences such as
// "Here is a question: …" are not misread as bot addresses.
if let Some(colon_pos) = lower.find(':') {
let prefix = &lower[..colon_pos];
// Single-word prefix (no spaces).
if !prefix.contains(' ') && !prefix.is_empty() {
if prefix == bot_name_lower || prefix == bot_localpart {
return false; // addressed to us
}
return true; // addressed to someone else
}
// Multi-word prefix: only treat as an address if it is an exact
// case-insensitive match for our display name.
if prefix == bot_name_lower {
return false; // addressed to us
}
// Otherwise the colon is part of a regular sentence — not an address.
}
false
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
fn make_user_id(s: &str) -> OwnedUserId {
s.parse().unwrap()
}
// -- mentions_bot -------------------------------------------------------
#[test]
fn mentions_bot_by_full_id() {
let uid = make_user_id("@timmy:homeserver.local");
assert!(mentions_bot(
"hello @timmy:homeserver.local can you help?",
None,
&uid
));
}
#[test]
fn mentions_bot_by_localpart_at_start() {
let uid = make_user_id("@timmy:homeserver.local");
assert!(mentions_bot("@timmy please list open stories", None, &uid));
}
#[test]
fn mentions_bot_by_localpart_mid_sentence() {
let uid = make_user_id("@timmy:homeserver.local");
assert!(mentions_bot("hey @timmy what's the status?", None, &uid));
}
#[test]
fn mentions_bot_not_mentioned() {
let uid = make_user_id("@timmy:homeserver.local");
assert!(!mentions_bot(
"can someone help me with this PR?",
None,
&uid
));
}
#[test]
fn mentions_bot_no_false_positive_longer_username() {
// "@timmybot" must NOT match "@timmy"
let uid = make_user_id("@timmy:homeserver.local");
assert!(!mentions_bot("hey @timmybot can you help?", None, &uid));
}
#[test]
fn mentions_bot_at_end_of_string() {
let uid = make_user_id("@timmy:homeserver.local");
assert!(mentions_bot("shoutout to @timmy", None, &uid));
}
#[test]
fn mentions_bot_followed_by_comma() {
let uid = make_user_id("@timmy:homeserver.local");
assert!(mentions_bot("@timmy, can you help?", None, &uid));
}
// -- is_reply_to_bot ----------------------------------------------------
#[tokio::test]
async fn is_reply_to_bot_direct_reply_match() {
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
Arc::new(TokioMutex::new(HashSet::new()));
let event_id: OwnedEventId = "$abc123:example.com".parse().unwrap();
sent.lock().await.insert(event_id.clone());
let in_reply_to = matrix_sdk::ruma::events::relation::InReplyTo::new(event_id);
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> =
Some(Relation::Reply { in_reply_to });
assert!(is_reply_to_bot(relates_to.as_ref(), &sent).await);
}
#[tokio::test]
async fn is_reply_to_bot_direct_reply_no_match() {
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
Arc::new(TokioMutex::new(HashSet::new()));
// sent is empty — this event was not sent by the bot
let in_reply_to = matrix_sdk::ruma::events::relation::InReplyTo::new(
"$other:example.com".parse::<OwnedEventId>().unwrap(),
);
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> =
Some(Relation::Reply { in_reply_to });
assert!(!is_reply_to_bot(relates_to.as_ref(), &sent).await);
}
#[tokio::test]
async fn is_reply_to_bot_no_relation() {
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
Arc::new(TokioMutex::new(HashSet::new()));
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> = None;
assert!(!is_reply_to_bot(relates_to.as_ref(), &sent).await);
}
#[tokio::test]
async fn is_reply_to_bot_thread_root_match() {
let sent: Arc<TokioMutex<HashSet<OwnedEventId>>> =
Arc::new(TokioMutex::new(HashSet::new()));
let root_id: OwnedEventId = "$root123:example.com".parse().unwrap();
sent.lock().await.insert(root_id.clone());
// Thread reply where the thread root is the bot's message
let thread = matrix_sdk::ruma::events::relation::Thread::plain(
root_id,
"$latest:example.com".parse::<OwnedEventId>().unwrap(),
);
let relates_to: Option<Relation<RoomMessageEventContentWithoutRelation>> =
Some(Relation::Thread(thread));
assert!(is_reply_to_bot(relates_to.as_ref(), &sent).await);
}
// -- is_addressed_to_other ----------------------------------------------
#[test]
fn addressed_to_other_display_name_colon() {
// "sally: do X" — addressed to sally, not our bot (stu)
let uid = make_user_id("@stu:homeserver.local");
assert!(is_addressed_to_other("sally: do X", &uid, "stu"));
}
#[test]
fn addressed_to_other_at_mention() {
// "@sally do X" — addressed to sally, not our bot (stu)
let uid = make_user_id("@stu:homeserver.local");
assert!(is_addressed_to_other("@sally do X", &uid, "stu"));
}
#[test]
fn addressed_to_other_at_mention_full_id() {
// "@sally:homeserver.local do X" — localpart is still "sally"
let uid = make_user_id("@stu:homeserver.local");
assert!(is_addressed_to_other(
"@sally:homeserver.local do X",
&uid,
"stu"
));
}
#[test]
fn not_addressed_to_other_self_display_name() {
// "stu: do X" — addressed to us
let uid = make_user_id("@stu:homeserver.local");
assert!(!is_addressed_to_other("stu: do X", &uid, "stu"));
}
#[test]
fn not_addressed_to_other_self_at_mention() {
// "@stu do X" — addressed to us
let uid = make_user_id("@stu:homeserver.local");
assert!(!is_addressed_to_other("@stu do X", &uid, "stu"));
}
#[test]
fn not_addressed_to_other_self_at_mention_full_id() {
// "@stu:homeserver.local do X" — addressed to us
let uid = make_user_id("@stu:homeserver.local");
assert!(!is_addressed_to_other(
"@stu:homeserver.local do X",
&uid,
"stu"
));
}
#[test]
fn not_addressed_to_other_no_addressee() {
// No explicit addressee — ambient message for everyone
let uid = make_user_id("@stu:homeserver.local");
assert!(!is_addressed_to_other(
"what's the status of the pipeline?",
&uid,
"stu"
));
}
#[test]
fn not_addressed_to_other_sentence_with_colon() {
// Regular sentence with colon — not an address
let uid = make_user_id("@stu:homeserver.local");
assert!(!is_addressed_to_other(
"here is the answer: it depends",
&uid,
"stu"
));
}
#[test]
fn not_addressed_to_other_display_name_case_insensitive() {
// "STU: do X" — case-insensitive match against our name "stu"
let uid = make_user_id("@stu:homeserver.local");
assert!(!is_addressed_to_other("STU: do X", &uid, "stu"));
}
#[test]
fn addressed_to_other_case_insensitive_other_name() {
// "SALLY: do X" — addressed to sally, not us
let uid = make_user_id("@stu:homeserver.local");
assert!(is_addressed_to_other("SALLY: do X", &uid, "stu"));
}
}