//! Server rebuild and restart logic shared between the MCP tool and Matrix bot command. use crate::agents::AgentPool; use crate::slog; use crate::chat::ChatTransport; use std::path::Path; use std::sync::Arc; // ── Shutdown notification ──────────────────────────────────────────────── /// The reason the server is shutting down. /// /// Used to select the appropriate shutdown message sent to active bot channels. #[derive(Clone, Debug, PartialEq)] pub enum ShutdownReason { /// The operator stopped the server manually (SIGINT / SIGTERM / ctrl-c). Manual, /// A rebuild-and-restart was requested (via MCP tool or bot command). Rebuild, } /// Sends a shutdown announcement to all configured bot channels. /// /// Wraps a [`ChatTransport`] together with the list of channel/room IDs the /// bot is active in. Calling [`notify`] is best-effort — failures are logged /// but never propagate, so shutdown is never blocked by a failed send. pub struct BotShutdownNotifier { transport: Arc, channels: Vec, bot_name: String, } impl BotShutdownNotifier { pub fn new( transport: Arc, channels: Vec, bot_name: String, ) -> Self { Self { transport, channels, bot_name, } } /// Send a startup announcement to all configured channels. /// /// Called once per process start so users know the bot is online. /// Errors are logged and ignored — startup is never blocked by a failed send. pub async fn notify_startup(&self) { let msg = format!("{} is online.", self.bot_name); for channel in &self.channels { if let Err(e) = self.transport.send_message(channel, &msg, &msg).await { slog!("[startup] Failed to send startup message to {channel}: {e}"); } } } /// Send a shutdown message to all configured channels. /// /// Errors from individual sends are logged and ignored so that a single /// failing channel does not prevent messages from reaching the rest. pub async fn notify(&self, reason: ShutdownReason) { let msg = match reason { ShutdownReason::Manual => { format!("{} is going offline (server stopped).", self.bot_name) } ShutdownReason::Rebuild => { format!( "{} is going offline to pick up a new build.", self.bot_name ) } }; for channel in &self.channels { if let Err(e) = self.transport.send_message(channel, &msg, &msg).await { slog!("[shutdown] Failed to send shutdown message to {channel}: {e}"); } } } } // ── Rebuild ────────────────────────────────────────────────────────────── /// Rebuild the server binary and re-exec. /// /// 1. Gracefully stops all running agents (kills PTY children). /// 2. Runs `cargo build [-p storkit]` from the workspace root, matching /// the current build profile (debug or release). /// 3. If the build fails, returns the build error (server stays up). /// 4. If the build succeeds, sends a best-effort shutdown notification (if a /// [`BotShutdownNotifier`] is provided), then re-execs the process with /// the new binary via `std::os::unix::process::CommandExt::exec()`. pub async fn rebuild_and_restart( agents: &AgentPool, project_root: &Path, notifier: Option<&BotShutdownNotifier>, ) -> Result { slog!("[rebuild] Rebuild and restart requested"); // 1. Gracefully stop all running agents. let running_count = agents .list_agents() .unwrap_or_default() .iter() .filter(|a| a.status == crate::agents::AgentStatus::Running) .count(); if running_count > 0 { slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild"); } agents.kill_all_children(); // 2. Find the workspace root (parent of the server binary's source). // CARGO_MANIFEST_DIR at compile time points to the `server/` crate; // the workspace root is its parent. let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")); let workspace_root = manifest_dir .parent() .ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?; slog!( "[rebuild] Building server from workspace root: {}", workspace_root.display() ); // 3. Rebuild the frontend bundle so rust-embed picks up the latest assets. let frontend_dir = workspace_root.join("frontend"); if frontend_dir.join("package.json").exists() { slog!("[rebuild] Building frontend"); let fe_output = tokio::task::spawn_blocking({ let frontend_dir = frontend_dir.clone(); move || { std::process::Command::new("npm") .args(["run", "build"]) .current_dir(&frontend_dir) .output() } }) .await .map_err(|e| format!("Frontend build task panicked: {e}"))? .map_err(|e| format!("Failed to run npm run build: {e}"))?; if !fe_output.status.success() { let stderr = String::from_utf8_lossy(&fe_output.stderr); slog!("[rebuild] Frontend build failed:\n{stderr}"); return Err(format!("Frontend build failed:\n{stderr}")); } slog!("[rebuild] Frontend build succeeded"); } // 4. Build the server binary, matching the current build profile so the // re-exec via current_exe() picks up the new binary. let build_args: Vec<&str> = if cfg!(debug_assertions) { vec!["build", "-p", "storkit"] } else { vec!["build", "--release", "-p", "storkit"] }; slog!("[rebuild] cargo {}", build_args.join(" ")); let output = tokio::task::spawn_blocking({ let workspace_root = workspace_root.to_path_buf(); move || { std::process::Command::new("cargo") .args(&build_args) .current_dir(&workspace_root) .output() } }) .await .map_err(|e| format!("Build task panicked: {e}"))? .map_err(|e| format!("Failed to run cargo build: {e}"))?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); slog!("[rebuild] Build failed:\n{stderr}"); return Err(format!("Build failed:\n{stderr}")); } slog!("[rebuild] Build succeeded, re-execing with new binary"); // 5. Send shutdown notification before replacing the process so that chat // participants know the bot is going offline. Best-effort only — we // do not abort the rebuild if the send fails. if let Some(n) = notifier { n.notify(ShutdownReason::Rebuild).await; } // 6. Re-exec with the new binary. // Use the cargo output path rather than current_exe() so that rebuilds // inside Docker work correctly — the running binary may be installed at // /usr/local/bin/storkit (read-only) while cargo writes the new binary // to /app/target/release/storkit (a writable volume). let new_exe = if cfg!(debug_assertions) { workspace_root.join("target/debug/storkit") } else { workspace_root.join("target/release/storkit") }; let args: Vec = std::env::args().collect(); // Remove the port file before re-exec so the new process can write its own. let port_file = project_root.join(".storkit_port"); if port_file.exists() { let _ = std::fs::remove_file(&port_file); } // Also check cwd for port file. let cwd_port_file = std::path::Path::new(".storkit_port"); if cwd_port_file.exists() { let _ = std::fs::remove_file(cwd_port_file); } // Use exec() to replace the current process. // This never returns on success. use std::os::unix::process::CommandExt; let err = std::process::Command::new(&new_exe) .args(&args[1..]) .exec(); // If we get here, exec() failed. Err(format!("Failed to exec new binary: {err}")) } // ── Tests ──────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use async_trait::async_trait; use crate::chat::MessageId; use std::sync::Mutex; /// In-memory transport that records sent messages. struct CapturingTransport { sent: Mutex>, fail: bool, } impl CapturingTransport { fn new() -> Self { Self { sent: Mutex::new(Vec::new()), fail: false, } } fn failing() -> Self { Self { sent: Mutex::new(Vec::new()), fail: true, } } fn messages(&self) -> Vec<(String, String)> { self.sent.lock().unwrap().clone() } } #[async_trait] impl ChatTransport for CapturingTransport { async fn send_message( &self, room_id: &str, plain: &str, _html: &str, ) -> Result { if self.fail { return Err("send failed".to_string()); } self.sent .lock() .unwrap() .push((room_id.to_string(), plain.to_string())); Ok("msg-id".to_string()) } async fn edit_message( &self, _room_id: &str, _original_message_id: &str, _plain: &str, _html: &str, ) -> Result<(), String> { Ok(()) } async fn send_typing(&self, _room_id: &str, _typing: bool) -> Result<(), String> { Ok(()) } } #[tokio::test] async fn notify_manual_sends_to_all_channels() { let transport = Arc::new(CapturingTransport::new()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec!["#channel1".to_string(), "#channel2".to_string()], "Timmy".to_string(), ); notifier.notify(ShutdownReason::Manual).await; let msgs = transport.messages(); assert_eq!(msgs.len(), 2); assert_eq!(msgs[0].0, "#channel1"); assert_eq!(msgs[1].0, "#channel2"); // Message must indicate manual stop. assert!( msgs[0].1.contains("offline"), "expected 'offline' in manual message: {}", msgs[0].1 ); assert!( msgs[0].1.contains("stopped") || msgs[0].1.contains("manual"), "expected reason in manual message: {}", msgs[0].1 ); } #[tokio::test] async fn notify_rebuild_sends_rebuild_reason() { let transport = Arc::new(CapturingTransport::new()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec!["#general".to_string()], "Timmy".to_string(), ); notifier.notify(ShutdownReason::Rebuild).await; let msgs = transport.messages(); assert_eq!(msgs.len(), 1); // Message must indicate rebuild, not manual stop. assert!( msgs[0].1.contains("build") || msgs[0].1.contains("rebuild"), "expected rebuild reason in message: {}", msgs[0].1 ); } #[tokio::test] async fn notify_manual_and_rebuild_messages_are_distinct() { let transport_a = Arc::new(CapturingTransport::new()); let notifier_a = BotShutdownNotifier::new( Arc::clone(&transport_a) as Arc, vec!["C1".to_string()], "Bot".to_string(), ); notifier_a.notify(ShutdownReason::Manual).await; let transport_b = Arc::new(CapturingTransport::new()); let notifier_b = BotShutdownNotifier::new( Arc::clone(&transport_b) as Arc, vec!["C1".to_string()], "Bot".to_string(), ); notifier_b.notify(ShutdownReason::Rebuild).await; let manual_msg = &transport_a.messages()[0].1; let rebuild_msg = &transport_b.messages()[0].1; assert_ne!(manual_msg, rebuild_msg, "manual and rebuild messages must differ"); } #[tokio::test] async fn notify_is_best_effort_failing_send_does_not_panic() { // A transport that always fails should not cause notify() to panic or // return an error — the failure is swallowed silently. let transport = Arc::new(CapturingTransport::failing()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec!["#channel".to_string()], "Timmy".to_string(), ); // Should complete without panicking. notifier.notify(ShutdownReason::Manual).await; } #[tokio::test] async fn notify_with_no_channels_is_noop() { let transport = Arc::new(CapturingTransport::new()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec![], "Timmy".to_string(), ); notifier.notify(ShutdownReason::Manual).await; assert!(transport.messages().is_empty()); } // -- notify_startup ------------------------------------------------------- #[tokio::test] async fn notify_startup_sends_online_message_to_all_channels() { let transport = Arc::new(CapturingTransport::new()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec!["#channel1".to_string(), "#channel2".to_string()], "Timmy".to_string(), ); notifier.notify_startup().await; let msgs = transport.messages(); assert_eq!(msgs.len(), 2); assert_eq!(msgs[0].0, "#channel1"); assert_eq!(msgs[1].0, "#channel2"); assert!( msgs[0].1.contains("online"), "expected 'online' in startup message: {}", msgs[0].1 ); assert!( msgs[0].1.contains("Timmy"), "expected bot name in startup message: {}", msgs[0].1 ); } #[tokio::test] async fn notify_startup_message_uses_bot_name() { let transport = Arc::new(CapturingTransport::new()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec!["#general".to_string()], "HAL".to_string(), ); notifier.notify_startup().await; let msgs = transport.messages(); assert_eq!(msgs[0].1, "HAL is online."); } #[tokio::test] async fn notify_startup_with_no_channels_is_noop() { let transport = Arc::new(CapturingTransport::new()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec![], "Timmy".to_string(), ); notifier.notify_startup().await; assert!(transport.messages().is_empty()); } #[tokio::test] async fn notify_startup_is_best_effort_failing_send_does_not_panic() { let transport = Arc::new(CapturingTransport::failing()); let notifier = BotShutdownNotifier::new( Arc::clone(&transport) as Arc, vec!["#channel".to_string()], "Timmy".to_string(), ); // Should complete without panicking. notifier.notify_startup().await; } #[tokio::test] async fn notify_startup_message_differs_from_shutdown_message() { let transport_start = Arc::new(CapturingTransport::new()); let notifier_start = BotShutdownNotifier::new( Arc::clone(&transport_start) as Arc, vec!["C1".to_string()], "Bot".to_string(), ); notifier_start.notify_startup().await; let transport_stop = Arc::new(CapturingTransport::new()); let notifier_stop = BotShutdownNotifier::new( Arc::clone(&transport_stop) as Arc, vec!["C1".to_string()], "Bot".to_string(), ); notifier_stop.notify(ShutdownReason::Manual).await; let startup_msg = &transport_start.messages()[0].1; let shutdown_msg = &transport_stop.messages()[0].1; assert_ne!(startup_msg, shutdown_msg, "startup and shutdown messages must differ"); } }