storkit: merge 366_story_bot_sends_shutdown_message_on_server_stop_or_rebuild
This commit is contained in:
+248
-4
@@ -2,7 +2,72 @@
|
||||
|
||||
use crate::agents::AgentPool;
|
||||
use crate::slog;
|
||||
use crate::transport::ChatTransport;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
// ── Shutdown notification ────────────────────────────────────────────────
|
||||
|
||||
/// The reason the server is shutting down.
|
||||
///
|
||||
/// Used to select the appropriate shutdown message sent to active bot channels.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum ShutdownReason {
|
||||
/// The operator stopped the server manually (SIGINT / SIGTERM / ctrl-c).
|
||||
Manual,
|
||||
/// A rebuild-and-restart was requested (via MCP tool or bot command).
|
||||
Rebuild,
|
||||
}
|
||||
|
||||
/// Sends a shutdown announcement to all configured bot channels.
|
||||
///
|
||||
/// Wraps a [`ChatTransport`] together with the list of channel/room IDs the
|
||||
/// bot is active in. Calling [`notify`] is best-effort — failures are logged
|
||||
/// but never propagate, so shutdown is never blocked by a failed send.
|
||||
pub struct BotShutdownNotifier {
|
||||
transport: Arc<dyn ChatTransport>,
|
||||
channels: Vec<String>,
|
||||
bot_name: String,
|
||||
}
|
||||
|
||||
impl BotShutdownNotifier {
|
||||
pub fn new(
|
||||
transport: Arc<dyn ChatTransport>,
|
||||
channels: Vec<String>,
|
||||
bot_name: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
transport,
|
||||
channels,
|
||||
bot_name,
|
||||
}
|
||||
}
|
||||
|
||||
/// Send a shutdown message to all configured channels.
|
||||
///
|
||||
/// Errors from individual sends are logged and ignored so that a single
|
||||
/// failing channel does not prevent messages from reaching the rest.
|
||||
pub async fn notify(&self, reason: ShutdownReason) {
|
||||
let msg = match reason {
|
||||
ShutdownReason::Manual => {
|
||||
format!("{} is going offline (server stopped).", self.bot_name)
|
||||
}
|
||||
ShutdownReason::Rebuild => {
|
||||
format!(
|
||||
"{} is going offline to pick up a new build.",
|
||||
self.bot_name
|
||||
)
|
||||
}
|
||||
};
|
||||
for channel in &self.channels {
|
||||
if let Err(e) = self.transport.send_message(channel, &msg, &msg).await {
|
||||
slog!("[shutdown] Failed to send shutdown message to {channel}: {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Rebuild ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// Rebuild the server binary and re-exec.
|
||||
///
|
||||
@@ -10,9 +75,14 @@ use std::path::Path;
|
||||
/// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
|
||||
/// the current build profile (debug or release).
|
||||
/// 3. If the build fails, returns the build error (server stays up).
|
||||
/// 4. If the build succeeds, re-execs the process with the new binary via
|
||||
/// `std::os::unix::process::CommandExt::exec()`.
|
||||
pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Result<String, String> {
|
||||
/// 4. If the build succeeds, sends a best-effort shutdown notification (if a
|
||||
/// [`BotShutdownNotifier`] is provided), then re-execs the process with
|
||||
/// the new binary via `std::os::unix::process::CommandExt::exec()`.
|
||||
pub async fn rebuild_and_restart(
|
||||
agents: &AgentPool,
|
||||
project_root: &Path,
|
||||
notifier: Option<&BotShutdownNotifier>,
|
||||
) -> Result<String, String> {
|
||||
slog!("[rebuild] Rebuild and restart requested");
|
||||
|
||||
// 1. Gracefully stop all running agents.
|
||||
@@ -69,7 +139,14 @@ pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Res
|
||||
|
||||
slog!("[rebuild] Build succeeded, re-execing with new binary");
|
||||
|
||||
// 4. Re-exec with the new binary.
|
||||
// 4. Send shutdown notification before replacing the process so that chat
|
||||
// participants know the bot is going offline. Best-effort only — we
|
||||
// do not abort the rebuild if the send fails.
|
||||
if let Some(n) = notifier {
|
||||
n.notify(ShutdownReason::Rebuild).await;
|
||||
}
|
||||
|
||||
// 5. Re-exec with the new binary.
|
||||
// Use the cargo output path rather than current_exe() so that rebuilds
|
||||
// inside Docker work correctly — the running binary may be installed at
|
||||
// /usr/local/bin/storkit (read-only) while cargo writes the new binary
|
||||
@@ -102,3 +179,170 @@ pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Res
|
||||
// If we get here, exec() failed.
|
||||
Err(format!("Failed to exec new binary: {err}"))
|
||||
}
|
||||
|
||||
// ── Tests ────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use async_trait::async_trait;
|
||||
use crate::transport::MessageId;
|
||||
use std::sync::Mutex;
|
||||
|
||||
/// In-memory transport that records sent messages.
|
||||
struct CapturingTransport {
|
||||
sent: Mutex<Vec<(String, String)>>,
|
||||
fail: bool,
|
||||
}
|
||||
|
||||
impl CapturingTransport {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
sent: Mutex::new(Vec::new()),
|
||||
fail: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn failing() -> Self {
|
||||
Self {
|
||||
sent: Mutex::new(Vec::new()),
|
||||
fail: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn messages(&self) -> Vec<(String, String)> {
|
||||
self.sent.lock().unwrap().clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ChatTransport for CapturingTransport {
|
||||
async fn send_message(
|
||||
&self,
|
||||
room_id: &str,
|
||||
plain: &str,
|
||||
_html: &str,
|
||||
) -> Result<MessageId, String> {
|
||||
if self.fail {
|
||||
return Err("send failed".to_string());
|
||||
}
|
||||
self.sent
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push((room_id.to_string(), plain.to_string()));
|
||||
Ok("msg-id".to_string())
|
||||
}
|
||||
|
||||
async fn edit_message(
|
||||
&self,
|
||||
_room_id: &str,
|
||||
_original_message_id: &str,
|
||||
_plain: &str,
|
||||
_html: &str,
|
||||
) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn send_typing(&self, _room_id: &str, _typing: bool) -> Result<(), String> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn notify_manual_sends_to_all_channels() {
|
||||
let transport = Arc::new(CapturingTransport::new());
|
||||
let notifier = BotShutdownNotifier::new(
|
||||
Arc::clone(&transport) as Arc<dyn ChatTransport>,
|
||||
vec!["#channel1".to_string(), "#channel2".to_string()],
|
||||
"Timmy".to_string(),
|
||||
);
|
||||
|
||||
notifier.notify(ShutdownReason::Manual).await;
|
||||
|
||||
let msgs = transport.messages();
|
||||
assert_eq!(msgs.len(), 2);
|
||||
assert_eq!(msgs[0].0, "#channel1");
|
||||
assert_eq!(msgs[1].0, "#channel2");
|
||||
// Message must indicate manual stop.
|
||||
assert!(
|
||||
msgs[0].1.contains("offline"),
|
||||
"expected 'offline' in manual message: {}",
|
||||
msgs[0].1
|
||||
);
|
||||
assert!(
|
||||
msgs[0].1.contains("stopped") || msgs[0].1.contains("manual"),
|
||||
"expected reason in manual message: {}",
|
||||
msgs[0].1
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn notify_rebuild_sends_rebuild_reason() {
|
||||
let transport = Arc::new(CapturingTransport::new());
|
||||
let notifier = BotShutdownNotifier::new(
|
||||
Arc::clone(&transport) as Arc<dyn ChatTransport>,
|
||||
vec!["#general".to_string()],
|
||||
"Timmy".to_string(),
|
||||
);
|
||||
|
||||
notifier.notify(ShutdownReason::Rebuild).await;
|
||||
|
||||
let msgs = transport.messages();
|
||||
assert_eq!(msgs.len(), 1);
|
||||
// Message must indicate rebuild, not manual stop.
|
||||
assert!(
|
||||
msgs[0].1.contains("build") || msgs[0].1.contains("rebuild"),
|
||||
"expected rebuild reason in message: {}",
|
||||
msgs[0].1
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn notify_manual_and_rebuild_messages_are_distinct() {
|
||||
let transport_a = Arc::new(CapturingTransport::new());
|
||||
let notifier_a = BotShutdownNotifier::new(
|
||||
Arc::clone(&transport_a) as Arc<dyn ChatTransport>,
|
||||
vec!["C1".to_string()],
|
||||
"Bot".to_string(),
|
||||
);
|
||||
notifier_a.notify(ShutdownReason::Manual).await;
|
||||
|
||||
let transport_b = Arc::new(CapturingTransport::new());
|
||||
let notifier_b = BotShutdownNotifier::new(
|
||||
Arc::clone(&transport_b) as Arc<dyn ChatTransport>,
|
||||
vec!["C1".to_string()],
|
||||
"Bot".to_string(),
|
||||
);
|
||||
notifier_b.notify(ShutdownReason::Rebuild).await;
|
||||
|
||||
let manual_msg = &transport_a.messages()[0].1;
|
||||
let rebuild_msg = &transport_b.messages()[0].1;
|
||||
assert_ne!(manual_msg, rebuild_msg, "manual and rebuild messages must differ");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn notify_is_best_effort_failing_send_does_not_panic() {
|
||||
// A transport that always fails should not cause notify() to panic or
|
||||
// return an error — the failure is swallowed silently.
|
||||
let transport = Arc::new(CapturingTransport::failing());
|
||||
let notifier = BotShutdownNotifier::new(
|
||||
Arc::clone(&transport) as Arc<dyn ChatTransport>,
|
||||
vec!["#channel".to_string()],
|
||||
"Timmy".to_string(),
|
||||
);
|
||||
// Should complete without panicking.
|
||||
notifier.notify(ShutdownReason::Manual).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn notify_with_no_channels_is_noop() {
|
||||
let transport = Arc::new(CapturingTransport::new());
|
||||
let notifier = BotShutdownNotifier::new(
|
||||
Arc::clone(&transport) as Arc<dyn ChatTransport>,
|
||||
vec![],
|
||||
"Timmy".to_string(),
|
||||
);
|
||||
notifier.notify(ShutdownReason::Manual).await;
|
||||
assert!(transport.messages().is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user