storkit: merge 366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild

This commit is contained in:
dave
2026-03-22 19:08:41 +00:00
parent f610ef6046
commit 47173e0d3a
7 changed files with 390 additions and 20 deletions
+248 -4
View File
@@ -2,7 +2,72 @@
use crate::agents::AgentPool;
use crate::slog;
use crate::transport::ChatTransport;
use std::path::Path;
use std::sync::Arc;
// ── Shutdown notification ────────────────────────────────────────────────
/// The reason the server is shutting down.
///
/// Used to select the appropriate shutdown message sent to active bot channels.
#[derive(Clone, Debug, PartialEq)]
pub enum ShutdownReason {
/// The operator stopped the server manually (SIGINT / SIGTERM / ctrl-c).
Manual,
/// A rebuild-and-restart was requested (via MCP tool or bot command).
Rebuild,
}
/// Sends a shutdown announcement to all configured bot channels.
///
/// Wraps a [`ChatTransport`] together with the list of channel/room IDs the
/// bot is active in. Calling [`notify`] is best-effort — failures are logged
/// but never propagate, so shutdown is never blocked by a failed send.
pub struct BotShutdownNotifier {
transport: Arc<dyn ChatTransport>,
channels: Vec<String>,
bot_name: String,
}
impl BotShutdownNotifier {
pub fn new(
transport: Arc<dyn ChatTransport>,
channels: Vec<String>,
bot_name: String,
) -> Self {
Self {
transport,
channels,
bot_name,
}
}
/// Send a shutdown message to all configured channels.
///
/// Errors from individual sends are logged and ignored so that a single
/// failing channel does not prevent messages from reaching the rest.
pub async fn notify(&self, reason: ShutdownReason) {
let msg = match reason {
ShutdownReason::Manual => {
format!("{} is going offline (server stopped).", self.bot_name)
}
ShutdownReason::Rebuild => {
format!(
"{} is going offline to pick up a new build.",
self.bot_name
)
}
};
for channel in &self.channels {
if let Err(e) = self.transport.send_message(channel, &msg, &msg).await {
slog!("[shutdown] Failed to send shutdown message to {channel}: {e}");
}
}
}
}
// ── Rebuild ──────────────────────────────────────────────────────────────
/// Rebuild the server binary and re-exec.
///
@@ -10,9 +75,14 @@ use std::path::Path;
/// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
/// the current build profile (debug or release).
/// 3. If the build fails, returns the build error (server stays up).
/// 4. If the build succeeds, re-execs the process with the new binary via
/// `std::os::unix::process::CommandExt::exec()`.
pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Result<String, String> {
/// 4. If the build succeeds, sends a best-effort shutdown notification (if a
/// [`BotShutdownNotifier`] is provided), then re-execs the process with
/// the new binary via `std::os::unix::process::CommandExt::exec()`.
pub async fn rebuild_and_restart(
agents: &AgentPool,
project_root: &Path,
notifier: Option<&BotShutdownNotifier>,
) -> Result<String, String> {
slog!("[rebuild] Rebuild and restart requested");
// 1. Gracefully stop all running agents.
@@ -69,7 +139,14 @@ pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Res
slog!("[rebuild] Build succeeded, re-execing with new binary");
// 4. Re-exec with the new binary.
// 4. Send shutdown notification before replacing the process so that chat
// participants know the bot is going offline. Best-effort only — we
// do not abort the rebuild if the send fails.
if let Some(n) = notifier {
n.notify(ShutdownReason::Rebuild).await;
}
// 5. Re-exec with the new binary.
// Use the cargo output path rather than current_exe() so that rebuilds
// inside Docker work correctly — the running binary may be installed at
// /usr/local/bin/storkit (read-only) while cargo writes the new binary
@@ -102,3 +179,170 @@ pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Res
// If we get here, exec() failed.
Err(format!("Failed to exec new binary: {err}"))
}
// ── Tests ────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use async_trait::async_trait;
use crate::transport::MessageId;
use std::sync::Mutex;
/// In-memory transport that records sent messages.
struct CapturingTransport {
sent: Mutex<Vec<(String, String)>>,
fail: bool,
}
impl CapturingTransport {
fn new() -> Self {
Self {
sent: Mutex::new(Vec::new()),
fail: false,
}
}
fn failing() -> Self {
Self {
sent: Mutex::new(Vec::new()),
fail: true,
}
}
fn messages(&self) -> Vec<(String, String)> {
self.sent.lock().unwrap().clone()
}
}
#[async_trait]
impl ChatTransport for CapturingTransport {
async fn send_message(
&self,
room_id: &str,
plain: &str,
_html: &str,
) -> Result<MessageId, String> {
if self.fail {
return Err("send failed".to_string());
}
self.sent
.lock()
.unwrap()
.push((room_id.to_string(), plain.to_string()));
Ok("msg-id".to_string())
}
async fn edit_message(
&self,
_room_id: &str,
_original_message_id: &str,
_plain: &str,
_html: &str,
) -> Result<(), String> {
Ok(())
}
async fn send_typing(&self, _room_id: &str, _typing: bool) -> Result<(), String> {
Ok(())
}
}
#[tokio::test]
async fn notify_manual_sends_to_all_channels() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#channel1".to_string(), "#channel2".to_string()],
"Timmy".to_string(),
);
notifier.notify(ShutdownReason::Manual).await;
let msgs = transport.messages();
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].0, "#channel1");
assert_eq!(msgs[1].0, "#channel2");
// Message must indicate manual stop.
assert!(
msgs[0].1.contains("offline"),
"expected 'offline' in manual message: {}",
msgs[0].1
);
assert!(
msgs[0].1.contains("stopped") || msgs[0].1.contains("manual"),
"expected reason in manual message: {}",
msgs[0].1
);
}
#[tokio::test]
async fn notify_rebuild_sends_rebuild_reason() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#general".to_string()],
"Timmy".to_string(),
);
notifier.notify(ShutdownReason::Rebuild).await;
let msgs = transport.messages();
assert_eq!(msgs.len(), 1);
// Message must indicate rebuild, not manual stop.
assert!(
msgs[0].1.contains("build") || msgs[0].1.contains("rebuild"),
"expected rebuild reason in message: {}",
msgs[0].1
);
}
#[tokio::test]
async fn notify_manual_and_rebuild_messages_are_distinct() {
let transport_a = Arc::new(CapturingTransport::new());
let notifier_a = BotShutdownNotifier::new(
Arc::clone(&transport_a) as Arc<dyn ChatTransport>,
vec!["C1".to_string()],
"Bot".to_string(),
);
notifier_a.notify(ShutdownReason::Manual).await;
let transport_b = Arc::new(CapturingTransport::new());
let notifier_b = BotShutdownNotifier::new(
Arc::clone(&transport_b) as Arc<dyn ChatTransport>,
vec!["C1".to_string()],
"Bot".to_string(),
);
notifier_b.notify(ShutdownReason::Rebuild).await;
let manual_msg = &transport_a.messages()[0].1;
let rebuild_msg = &transport_b.messages()[0].1;
assert_ne!(manual_msg, rebuild_msg, "manual and rebuild messages must differ");
}
#[tokio::test]
async fn notify_is_best_effort_failing_send_does_not_panic() {
// A transport that always fails should not cause notify() to panic or
// return an error — the failure is swallowed silently.
let transport = Arc::new(CapturingTransport::failing());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#channel".to_string()],
"Timmy".to_string(),
);
// Should complete without panicking.
notifier.notify(ShutdownReason::Manual).await;
}
#[tokio::test]
async fn notify_with_no_channels_is_noop() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec![],
"Timmy".to_string(),
);
notifier.notify(ShutdownReason::Manual).await;
assert!(transport.messages().is_empty());
}
}