feat(884): persistent perm_rx lock-holder for Matrix bot
Before: handle_message.rs acquired services.perm_rx only while processing one chat message and dropped it on chat_fut completion. The moment the bot wasn't actively responding, prompt_permission auto-denied any spawned coder bash call as "no interactive session" — making unattended coder work impossible. Now: a permission_listener task is spawned at bot startup and holds perm_rx for the bot's lifetime. Permission requests are forwarded to the first configured Matrix room, replies resolved by the existing on_room_message handler via pending_perm_replies. Per-message acquire is gone from handle_message.rs (chat_fut just awaits cleanly). - New module: chat/transport/matrix/bot/permission_listener.rs. - Wired into run_bot before BotContext construction; bot_sent_event_ids is hoisted out so the listener and the rest of the bot share it. - handle_message.rs no longer touches perm_rx. - diagnostics/permission.rs comment updated to reflect the new reality. - Regression test asserts the listener forwards a PermissionForward to the target room and records the pending reply key — exactly the path that was broken when no chat_fut was in flight. Discord/Slack/WhatsApp transports still acquire perm_rx per message (commands.rs:368 / commands/llm.rs:83 / commands/llm.rs:82). They are not the active transport in this deployment so their per-message acquire remains dormant; the same listener pattern should be applied to them as follow-up work in 884 phase 2.
This commit is contained in:
@@ -2,13 +2,11 @@
|
|||||||
//! streams the assistant reply back to the room.
|
//! streams the assistant reply back to the room.
|
||||||
|
|
||||||
use crate::chat::util::drain_complete_paragraphs;
|
use crate::chat::util::drain_complete_paragraphs;
|
||||||
use crate::http::context::PermissionDecision;
|
|
||||||
use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
|
use crate::llm::providers::claude_code::{ClaudeCodeProvider, ClaudeCodeResult};
|
||||||
use crate::slog;
|
use crate::slog;
|
||||||
use matrix_sdk::ruma::OwnedRoomId;
|
use matrix_sdk::ruma::OwnedRoomId;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::time::Duration;
|
|
||||||
use tokio::sync::watch;
|
use tokio::sync::watch;
|
||||||
|
|
||||||
use super::super::context::BotContext;
|
use super::super::context::BotContext;
|
||||||
@@ -113,64 +111,10 @@ pub(in crate::chat::transport::matrix::bot) async fn handle_message(
|
|||||||
);
|
);
|
||||||
tokio::pin!(chat_fut);
|
tokio::pin!(chat_fut);
|
||||||
|
|
||||||
// Lock the permission receiver for the duration of this chat session.
|
// Permission requests are handled by the persistent permission_listener
|
||||||
// Permission requests from the MCP `prompt_permission` tool arrive here.
|
// task spawned at bot startup (story 884) — they no longer route through
|
||||||
let mut perm_rx_guard = ctx.services.perm_rx.lock().await;
|
// per-message handlers. Just await chat_fut.
|
||||||
|
let result = (&mut chat_fut).await;
|
||||||
let result = loop {
|
|
||||||
tokio::select! {
|
|
||||||
r = &mut chat_fut => break r,
|
|
||||||
|
|
||||||
Some(perm_fwd) = perm_rx_guard.recv() => {
|
|
||||||
// Post the permission prompt to the room via the transport.
|
|
||||||
let prompt_msg = format!(
|
|
||||||
"**Permission Request**\n\n\
|
|
||||||
Tool: `{}`\n```json\n{}\n```\n\n\
|
|
||||||
Reply **yes** to approve or **no** to deny.",
|
|
||||||
perm_fwd.tool_name,
|
|
||||||
serde_json::to_string_pretty(&perm_fwd.tool_input)
|
|
||||||
.unwrap_or_else(|_| perm_fwd.tool_input.to_string()),
|
|
||||||
);
|
|
||||||
let html = markdown_to_html(&prompt_msg);
|
|
||||||
if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, &prompt_msg, &html).await
|
|
||||||
&& let Ok(event_id) = msg_id.parse()
|
|
||||||
{
|
|
||||||
sent_ids.lock().await.insert(event_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the MCP oneshot sender so the event handler can
|
|
||||||
// resolve it when the user replies yes/no.
|
|
||||||
ctx.services.pending_perm_replies
|
|
||||||
.lock()
|
|
||||||
.await
|
|
||||||
.insert(room_id.to_string(), perm_fwd.response_tx);
|
|
||||||
|
|
||||||
// Spawn a timeout task: auto-deny if the user does not respond.
|
|
||||||
let pending = Arc::clone(&ctx.services.pending_perm_replies);
|
|
||||||
let timeout_room_id = room_id.to_string();
|
|
||||||
let timeout_transport = Arc::clone(&ctx.transport);
|
|
||||||
let timeout_room_id_str = room_id_str.clone();
|
|
||||||
let timeout_sent_ids = Arc::clone(&ctx.bot_sent_event_ids);
|
|
||||||
let timeout_secs = ctx.services.permission_timeout_secs;
|
|
||||||
tokio::spawn(async move {
|
|
||||||
tokio::time::sleep(Duration::from_secs(timeout_secs)).await;
|
|
||||||
if let Some(tx) = pending.lock().await.remove(&timeout_room_id) {
|
|
||||||
let _ = tx.send(PermissionDecision::Deny);
|
|
||||||
let msg = "Permission request timed out — denied (fail-closed).";
|
|
||||||
let html = markdown_to_html(msg);
|
|
||||||
if let Ok(msg_id) = timeout_transport
|
|
||||||
.send_message(&timeout_room_id_str, msg, &html)
|
|
||||||
.await
|
|
||||||
&& let Ok(event_id) = msg_id.parse()
|
|
||||||
{
|
|
||||||
timeout_sent_ids.lock().await.insert(event_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
drop(perm_rx_guard);
|
|
||||||
|
|
||||||
// Flush any remaining text that didn't end with a paragraph boundary.
|
// Flush any remaining text that didn't end with a paragraph boundary.
|
||||||
let remaining = buffer.lock().unwrap().trim().to_string();
|
let remaining = buffer.lock().unwrap().trim().to_string();
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ pub mod history;
|
|||||||
pub mod mentions;
|
pub mod mentions;
|
||||||
/// Message handlers — processes incoming Matrix room messages.
|
/// Message handlers — processes incoming Matrix room messages.
|
||||||
pub mod messages;
|
pub mod messages;
|
||||||
|
/// Permission listener — holds perm_rx for the bot's lifetime and forwards
|
||||||
|
/// permission requests to the configured Matrix room.
|
||||||
|
pub mod permission_listener;
|
||||||
/// Bot run loop — the main async task that drives the Matrix sync loop.
|
/// Bot run loop — the main async task that drives the Matrix sync loop.
|
||||||
pub mod run;
|
pub mod run;
|
||||||
/// Device verification — handles Matrix cross-signing and emoji verification flows.
|
/// Device verification — handles Matrix cross-signing and emoji verification flows.
|
||||||
|
|||||||
@@ -0,0 +1,233 @@
|
|||||||
|
//! Background task that holds `perm_rx` for the bot's lifetime and forwards
|
||||||
|
//! permission requests to the configured Matrix room.
|
||||||
|
//!
|
||||||
|
//! Before story 884, each chat message handler acquired `perm_rx` for the
|
||||||
|
//! duration of one chat_fut and dropped it afterwards. That meant whenever
|
||||||
|
//! the bot wasn't actively responding, `prompt_permission` auto-denied any
|
||||||
|
//! spawned coder bash call as "no interactive session" — making unattended
|
||||||
|
//! coder work impossible. This task holds the lock continuously while the
|
||||||
|
//! bot is connected, so requests can flow at any time.
|
||||||
|
|
||||||
|
use crate::chat::ChatTransport;
|
||||||
|
use crate::http::context::PermissionDecision;
|
||||||
|
use crate::services::Services;
|
||||||
|
use crate::slog;
|
||||||
|
use matrix_sdk::ruma::{OwnedEventId, OwnedRoomId};
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::sync::Mutex as TokioMutex;
|
||||||
|
|
||||||
|
use super::format::markdown_to_html;
|
||||||
|
|
||||||
|
/// Spawn a background task that holds `services.perm_rx` for the bot's
|
||||||
|
/// lifetime and forwards each incoming permission request to `target_room`
|
||||||
|
/// as a chat message. Replies (yes/no) are resolved by the existing
|
||||||
|
/// `on_room_message` handler via `pending_perm_replies`.
|
||||||
|
///
|
||||||
|
/// Returns the JoinHandle so the caller can keep ownership; the task exits
|
||||||
|
/// only when the `perm_rx` channel is closed (bot shutdown).
|
||||||
|
pub fn spawn_permission_listener(
|
||||||
|
services: Arc<Services>,
|
||||||
|
transport: Arc<dyn ChatTransport>,
|
||||||
|
target_room: OwnedRoomId,
|
||||||
|
bot_sent_event_ids: Arc<TokioMutex<HashSet<OwnedEventId>>>,
|
||||||
|
) -> tokio::task::JoinHandle<()> {
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut perm_rx = services.perm_rx.lock().await;
|
||||||
|
let target_room_str = target_room.as_str().to_string();
|
||||||
|
slog!(
|
||||||
|
"[matrix-bot] permission listener started; forwarding requests to {target_room_str}"
|
||||||
|
);
|
||||||
|
|
||||||
|
while let Some(perm_fwd) = perm_rx.recv().await {
|
||||||
|
let prompt_msg = format!(
|
||||||
|
"**Permission Request**\n\n\
|
||||||
|
Tool: `{}`\n```json\n{}\n```\n\n\
|
||||||
|
Reply **yes** to approve or **no** to deny.",
|
||||||
|
perm_fwd.tool_name,
|
||||||
|
serde_json::to_string_pretty(&perm_fwd.tool_input)
|
||||||
|
.unwrap_or_else(|_| perm_fwd.tool_input.to_string()),
|
||||||
|
);
|
||||||
|
let html = markdown_to_html(&prompt_msg);
|
||||||
|
if let Ok(msg_id) = transport
|
||||||
|
.send_message(&target_room_str, &prompt_msg, &html)
|
||||||
|
.await
|
||||||
|
&& let Ok(event_id) = msg_id.parse::<OwnedEventId>()
|
||||||
|
{
|
||||||
|
bot_sent_event_ids.lock().await.insert(event_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the MCP oneshot sender so on_room_message can resolve it
|
||||||
|
// when the user replies yes/no in the target room.
|
||||||
|
services
|
||||||
|
.pending_perm_replies
|
||||||
|
.lock()
|
||||||
|
.await
|
||||||
|
.insert(target_room.to_string(), perm_fwd.response_tx);
|
||||||
|
|
||||||
|
// Spawn a per-request timeout: auto-deny if the user does not
|
||||||
|
// respond within `permission_timeout_secs`.
|
||||||
|
let pending = Arc::clone(&services.pending_perm_replies);
|
||||||
|
let timeout_room_key = target_room.to_string();
|
||||||
|
let timeout_transport = Arc::clone(&transport);
|
||||||
|
let timeout_room_str = target_room_str.clone();
|
||||||
|
let timeout_sent_ids = Arc::clone(&bot_sent_event_ids);
|
||||||
|
let timeout_secs = services.permission_timeout_secs;
|
||||||
|
tokio::spawn(async move {
|
||||||
|
tokio::time::sleep(Duration::from_secs(timeout_secs)).await;
|
||||||
|
if let Some(tx) = pending.lock().await.remove(&timeout_room_key) {
|
||||||
|
let _ = tx.send(PermissionDecision::Deny);
|
||||||
|
let msg = "Permission request timed out — denied (fail-closed).";
|
||||||
|
let html = markdown_to_html(msg);
|
||||||
|
if let Ok(msg_id) = timeout_transport
|
||||||
|
.send_message(&timeout_room_str, msg, &html)
|
||||||
|
.await
|
||||||
|
&& let Ok(event_id) = msg_id.parse::<OwnedEventId>()
|
||||||
|
{
|
||||||
|
timeout_sent_ids.lock().await.insert(event_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
slog!("[matrix-bot] permission listener exiting (perm_rx channel closed)");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
//! Story 884 regression: spawn the listener with no in-flight chat
|
||||||
|
//! message, send a PermissionForward through the channel, and verify
|
||||||
|
//! the listener forwards it to the configured Matrix room (instead of
|
||||||
|
//! being auto-denied for "no interactive session").
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::http::context::PermissionForward;
|
||||||
|
use crate::services::Services;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use serde_json::json;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use tokio::sync::{mpsc, oneshot};
|
||||||
|
|
||||||
|
struct RecordingTransport {
|
||||||
|
sent: Arc<std::sync::Mutex<Vec<(String, String)>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl crate::chat::ChatTransport for RecordingTransport {
|
||||||
|
async fn send_message(
|
||||||
|
&self,
|
||||||
|
room_id: &str,
|
||||||
|
plain: &str,
|
||||||
|
_html: &str,
|
||||||
|
) -> Result<crate::chat::MessageId, String> {
|
||||||
|
self.sent
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.push((room_id.to_string(), plain.to_string()));
|
||||||
|
// Matrix event IDs start with `$`; OwnedEventId::parse expects
|
||||||
|
// a valid form so the listener can insert it into bot_sent_event_ids.
|
||||||
|
Ok("$test_event_id:example.com".to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn edit_message(
|
||||||
|
&self,
|
||||||
|
_: &str,
|
||||||
|
_: &str,
|
||||||
|
_: &str,
|
||||||
|
_: &str,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn send_typing(&self, _: &str, _: bool) -> Result<(), String> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_services_with_tx() -> (
|
||||||
|
Arc<Services>,
|
||||||
|
mpsc::UnboundedSender<PermissionForward>,
|
||||||
|
) {
|
||||||
|
let (perm_tx, perm_rx) = mpsc::unbounded_channel();
|
||||||
|
let services = Arc::new(Services {
|
||||||
|
project_root: std::path::PathBuf::from("/tmp/test"),
|
||||||
|
agents: Arc::new(crate::agents::AgentPool::new_test(3000)),
|
||||||
|
bot_name: "Assistant".to_string(),
|
||||||
|
bot_user_id: "@bot:example.com".to_string(),
|
||||||
|
ambient_rooms: Arc::new(std::sync::Mutex::new(
|
||||||
|
std::collections::HashSet::new(),
|
||||||
|
)),
|
||||||
|
perm_rx: Arc::new(TokioMutex::new(perm_rx)),
|
||||||
|
pending_perm_replies: Arc::new(TokioMutex::new(HashMap::new())),
|
||||||
|
permission_timeout_secs: 120,
|
||||||
|
status: Arc::new(crate::service::status::StatusBroadcaster::new()),
|
||||||
|
});
|
||||||
|
(services, perm_tx)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn listener_forwards_request_to_target_room_when_no_chat_in_flight() {
|
||||||
|
let (services, perm_tx) = test_services_with_tx();
|
||||||
|
let sent: Arc<std::sync::Mutex<Vec<(String, String)>>> =
|
||||||
|
Arc::new(std::sync::Mutex::new(Vec::new()));
|
||||||
|
let transport: Arc<dyn crate::chat::ChatTransport> = Arc::new(RecordingTransport {
|
||||||
|
sent: Arc::clone(&sent),
|
||||||
|
});
|
||||||
|
let target_room: OwnedRoomId = "!perm:example.com".parse().unwrap();
|
||||||
|
let bot_sent_event_ids = Arc::new(TokioMutex::new(HashSet::new()));
|
||||||
|
|
||||||
|
spawn_permission_listener(
|
||||||
|
Arc::clone(&services),
|
||||||
|
Arc::clone(&transport),
|
||||||
|
target_room.clone(),
|
||||||
|
Arc::clone(&bot_sent_event_ids),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Yield so the listener task acquires perm_rx and starts recv'ing.
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
||||||
|
|
||||||
|
let (response_tx, _response_rx) = oneshot::channel();
|
||||||
|
perm_tx
|
||||||
|
.send(PermissionForward {
|
||||||
|
request_id: "req-1".to_string(),
|
||||||
|
tool_name: "Bash".to_string(),
|
||||||
|
tool_input: json!({"command": "cargo test"}),
|
||||||
|
response_tx,
|
||||||
|
})
|
||||||
|
.expect("send PermissionForward");
|
||||||
|
|
||||||
|
// Give the listener a moment to process.
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||||
|
|
||||||
|
// The transport must have received exactly one send_message to the
|
||||||
|
// target room with the prompt content.
|
||||||
|
let recorded = sent.lock().unwrap().clone();
|
||||||
|
assert_eq!(recorded.len(), 1, "expected exactly one send_message");
|
||||||
|
assert_eq!(recorded[0].0, target_room.as_str());
|
||||||
|
assert!(
|
||||||
|
recorded[0].1.contains("Permission Request"),
|
||||||
|
"prompt body missing 'Permission Request' header: {}",
|
||||||
|
recorded[0].1
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
recorded[0].1.contains("Bash"),
|
||||||
|
"prompt body missing tool name: {}",
|
||||||
|
recorded[0].1
|
||||||
|
);
|
||||||
|
|
||||||
|
// pending_perm_replies must contain an entry keyed by the target room
|
||||||
|
// (so the user-reply handler can resolve the request when they reply).
|
||||||
|
let pending = services.pending_perm_replies.lock().await;
|
||||||
|
assert!(
|
||||||
|
pending.contains_key(target_room.as_str()),
|
||||||
|
"pending_perm_replies missing entry for target room"
|
||||||
|
);
|
||||||
|
|
||||||
|
// bot_sent_event_ids must have recorded the prompt's event ID so the
|
||||||
|
// bot does not echo its own prompt back as user input.
|
||||||
|
let sent_ids = bot_sent_event_ids.lock().await;
|
||||||
|
assert_eq!(sent_ids.len(), 1, "expected one sent event ID recorded");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -275,6 +275,26 @@ pub async fn run_bot(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Hoist bot_sent_event_ids out of BotContext so the permission listener
|
||||||
|
// can share it (the listener tracks which permission-prompt messages it
|
||||||
|
// posted so the bot doesn't echo them back as user input).
|
||||||
|
let bot_sent_event_ids: Arc<TokioMutex<HashSet<matrix_sdk::ruma::OwnedEventId>>> =
|
||||||
|
Arc::new(TokioMutex::new(HashSet::new()));
|
||||||
|
|
||||||
|
// Spawn the permission listener: holds `perm_rx` for the bot's lifetime
|
||||||
|
// and forwards permission requests to the first configured room. Story
|
||||||
|
// 884 — replaces the per-message lock acquire previously done in
|
||||||
|
// handle_message.rs, so spawned coders' bash calls reach chat even when
|
||||||
|
// the bot isn't actively responding.
|
||||||
|
if let Some(target_room) = target_room_ids.first() {
|
||||||
|
super::permission_listener::spawn_permission_listener(
|
||||||
|
Arc::clone(&services),
|
||||||
|
Arc::clone(&transport),
|
||||||
|
target_room.clone(),
|
||||||
|
Arc::clone(&bot_sent_event_ids),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let ctx = BotContext {
|
let ctx = BotContext {
|
||||||
services,
|
services,
|
||||||
matrix_user_id: bot_user_id,
|
matrix_user_id: bot_user_id,
|
||||||
@@ -282,7 +302,7 @@ pub async fn run_bot(
|
|||||||
allowed_users: config.allowed_users,
|
allowed_users: config.allowed_users,
|
||||||
history: Arc::new(TokioMutex::new(persisted)),
|
history: Arc::new(TokioMutex::new(persisted)),
|
||||||
history_size: config.history_size,
|
history_size: config.history_size,
|
||||||
bot_sent_event_ids: Arc::new(TokioMutex::new(HashSet::new())),
|
bot_sent_event_ids,
|
||||||
htop_sessions: Arc::new(TokioMutex::new(HashMap::new())),
|
htop_sessions: Arc::new(TokioMutex::new(HashMap::new())),
|
||||||
transport: Arc::clone(&transport),
|
transport: Arc::clone(&transport),
|
||||||
timer_store,
|
timer_store,
|
||||||
|
|||||||
@@ -30,13 +30,12 @@ pub(crate) async fn tool_prompt_permission(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Auto-deny immediately if no interactive session is currently listening on
|
// Auto-deny immediately if no interactive session is currently listening on
|
||||||
// perm_rx. Interactive sessions (WebSocket, Matrix bot chat) hold the
|
// perm_rx. Story 884 made the Matrix bot hold this lock for its lifetime
|
||||||
// perm_rx lock for the duration of a chat. If try_lock succeeds, nobody is
|
// via the permission_listener task spawned at startup, so requests reach
|
||||||
// listening — this is a background agent call that should never reach chat.
|
// chat asynchronously regardless of whether a chat message is in flight.
|
||||||
//
|
// Other transports (Discord/Slack/WhatsApp) still acquire per message; if
|
||||||
// Without this check, agent permission requests queue in the channel and
|
// none is active, try_lock succeeds — auto-deny so background agent calls
|
||||||
// get forwarded to Matrix/Slack/etc. at the start of the next user session,
|
// don't queue and flood chat at the next user session.
|
||||||
// flooding chat with stale agent prompts.
|
|
||||||
if ctx.services.perm_rx.try_lock().is_ok() {
|
if ctx.services.perm_rx.try_lock().is_ok() {
|
||||||
crate::slog!(
|
crate::slog!(
|
||||||
"[permission] Auto-denied '{tool_name}' (no interactive session — agent mode)"
|
"[permission] Auto-denied '{tool_name}' (no interactive session — agent mode)"
|
||||||
|
|||||||
Reference in New Issue
Block a user