Files
huskies/server/src/rebuild.rs
T
Timmy 2d8ccb3eb6 huskies: rename project from storkit to huskies
Rename all references from storkit to huskies across the codebase:
- .storkit/ directory → .huskies/
- Binary name, Cargo package name, Docker image references
- Server code, frontend code, config files, scripts
- Fix script/test to build frontend before cargo clippy/test
  so merge worktrees have frontend/dist available for RustEmbed

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 16:12:52 +01:00

478 lines
17 KiB
Rust

//! Server rebuild and restart logic shared between the MCP tool and Matrix bot command.
use crate::agents::AgentPool;
use crate::slog;
use crate::chat::ChatTransport;
use std::path::Path;
use std::sync::Arc;
// ── Shutdown notification ────────────────────────────────────────────────
/// The reason the server is shutting down.
///
/// Used to select the appropriate shutdown message sent to active bot channels.
#[derive(Clone, Debug, PartialEq)]
pub enum ShutdownReason {
/// The operator stopped the server manually (SIGINT / SIGTERM / ctrl-c).
Manual,
/// A rebuild-and-restart was requested (via MCP tool or bot command).
Rebuild,
}
/// Sends a shutdown announcement to all configured bot channels.
///
/// Wraps a [`ChatTransport`] together with the list of channel/room IDs the
/// bot is active in. Calling [`notify`] is best-effort — failures are logged
/// but never propagate, so shutdown is never blocked by a failed send.
pub struct BotShutdownNotifier {
transport: Arc<dyn ChatTransport>,
channels: Vec<String>,
bot_name: String,
}
impl BotShutdownNotifier {
pub fn new(
transport: Arc<dyn ChatTransport>,
channels: Vec<String>,
bot_name: String,
) -> Self {
Self {
transport,
channels,
bot_name,
}
}
/// Send a startup announcement to all configured channels.
///
/// Called once per process start so users know the bot is online.
/// Errors are logged and ignored — startup is never blocked by a failed send.
pub async fn notify_startup(&self) {
let msg = format!("{} is online.", self.bot_name);
for channel in &self.channels {
if let Err(e) = self.transport.send_message(channel, &msg, &msg).await {
slog!("[startup] Failed to send startup message to {channel}: {e}");
}
}
}
/// Send a shutdown message to all configured channels.
///
/// Errors from individual sends are logged and ignored so that a single
/// failing channel does not prevent messages from reaching the rest.
pub async fn notify(&self, reason: ShutdownReason) {
let msg = match reason {
ShutdownReason::Manual => {
format!("{} is going offline (server stopped).", self.bot_name)
}
ShutdownReason::Rebuild => {
format!(
"{} is going offline to pick up a new build.",
self.bot_name
)
}
};
for channel in &self.channels {
if let Err(e) = self.transport.send_message(channel, &msg, &msg).await {
slog!("[shutdown] Failed to send shutdown message to {channel}: {e}");
}
}
}
}
// ── Rebuild ──────────────────────────────────────────────────────────────
/// Rebuild the server binary and re-exec.
///
/// 1. Gracefully stops all running agents (kills PTY children).
/// 2. Runs `cargo build [-p huskies]` from the workspace root, matching
/// the current build profile (debug or release).
/// 3. If the build fails, returns the build error (server stays up).
/// 4. If the build succeeds, sends a best-effort shutdown notification (if a
/// [`BotShutdownNotifier`] is provided), then re-execs the process with
/// the new binary via `std::os::unix::process::CommandExt::exec()`.
pub async fn rebuild_and_restart(
agents: &AgentPool,
project_root: &Path,
notifier: Option<&BotShutdownNotifier>,
) -> Result<String, String> {
slog!("[rebuild] Rebuild and restart requested");
// 1. Gracefully stop all running agents.
let running_count = agents
.list_agents()
.unwrap_or_default()
.iter()
.filter(|a| a.status == crate::agents::AgentStatus::Running)
.count();
if running_count > 0 {
slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
}
agents.kill_all_children();
// 2. Find the workspace root (parent of the server binary's source).
// CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
// the workspace root is its parent.
let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
let workspace_root = manifest_dir
.parent()
.ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
slog!(
"[rebuild] Building server from workspace root: {}",
workspace_root.display()
);
// 3. Rebuild the frontend bundle so rust-embed picks up the latest assets.
let frontend_dir = workspace_root.join("frontend");
if frontend_dir.join("package.json").exists() {
slog!("[rebuild] Building frontend");
let fe_output = tokio::task::spawn_blocking({
let frontend_dir = frontend_dir.clone();
move || {
std::process::Command::new("npm")
.args(["run", "build"])
.current_dir(&frontend_dir)
.output()
}
})
.await
.map_err(|e| format!("Frontend build task panicked: {e}"))?
.map_err(|e| format!("Failed to run npm run build: {e}"))?;
if !fe_output.status.success() {
let stderr = String::from_utf8_lossy(&fe_output.stderr);
slog!("[rebuild] Frontend build failed:\n{stderr}");
return Err(format!("Frontend build failed:\n{stderr}"));
}
slog!("[rebuild] Frontend build succeeded");
}
// 4. Build the server binary, matching the current build profile so the
// re-exec via current_exe() picks up the new binary.
let build_args: Vec<&str> = if cfg!(debug_assertions) {
vec!["build", "-p", "huskies"]
} else {
vec!["build", "--release", "-p", "huskies"]
};
slog!("[rebuild] cargo {}", build_args.join(" "));
let output = tokio::task::spawn_blocking({
let workspace_root = workspace_root.to_path_buf();
move || {
std::process::Command::new("cargo")
.args(&build_args)
.current_dir(&workspace_root)
.output()
}
})
.await
.map_err(|e| format!("Build task panicked: {e}"))?
.map_err(|e| format!("Failed to run cargo build: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
slog!("[rebuild] Build failed:\n{stderr}");
return Err(format!("Build failed:\n{stderr}"));
}
slog!("[rebuild] Build succeeded, re-execing with new binary");
// 5. Send shutdown notification before replacing the process so that chat
// participants know the bot is going offline. Best-effort only — we
// do not abort the rebuild if the send fails.
if let Some(n) = notifier {
n.notify(ShutdownReason::Rebuild).await;
}
// 6. Re-exec with the new binary.
// Use the cargo output path rather than current_exe() so that rebuilds
// inside Docker work correctly — the running binary may be installed at
// /usr/local/bin/huskies (read-only) while cargo writes the new binary
// to /app/target/release/huskies (a writable volume).
let new_exe = if cfg!(debug_assertions) {
workspace_root.join("target/debug/huskies")
} else {
workspace_root.join("target/release/huskies")
};
let args: Vec<String> = std::env::args().collect();
// Remove the port file before re-exec so the new process can write its own.
let port_file = project_root.join(".huskies_port");
if port_file.exists() {
let _ = std::fs::remove_file(&port_file);
}
// Also check cwd for port file.
let cwd_port_file = std::path::Path::new(".huskies_port");
if cwd_port_file.exists() {
let _ = std::fs::remove_file(cwd_port_file);
}
// Use exec() to replace the current process.
// This never returns on success.
use std::os::unix::process::CommandExt;
let err = std::process::Command::new(&new_exe)
.args(&args[1..])
.exec();
// If we get here, exec() failed.
Err(format!("Failed to exec new binary: {err}"))
}
// ── Tests ────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use async_trait::async_trait;
use crate::chat::MessageId;
use std::sync::Mutex;
/// In-memory transport that records sent messages.
struct CapturingTransport {
sent: Mutex<Vec<(String, String)>>,
fail: bool,
}
impl CapturingTransport {
fn new() -> Self {
Self {
sent: Mutex::new(Vec::new()),
fail: false,
}
}
fn failing() -> Self {
Self {
sent: Mutex::new(Vec::new()),
fail: true,
}
}
fn messages(&self) -> Vec<(String, String)> {
self.sent.lock().unwrap().clone()
}
}
#[async_trait]
impl ChatTransport for CapturingTransport {
async fn send_message(
&self,
room_id: &str,
plain: &str,
_html: &str,
) -> Result<MessageId, String> {
if self.fail {
return Err("send failed".to_string());
}
self.sent
.lock()
.unwrap()
.push((room_id.to_string(), plain.to_string()));
Ok("msg-id".to_string())
}
async fn edit_message(
&self,
_room_id: &str,
_original_message_id: &str,
_plain: &str,
_html: &str,
) -> Result<(), String> {
Ok(())
}
async fn send_typing(&self, _room_id: &str, _typing: bool) -> Result<(), String> {
Ok(())
}
}
#[tokio::test]
async fn notify_manual_sends_to_all_channels() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#channel1".to_string(), "#channel2".to_string()],
"Timmy".to_string(),
);
notifier.notify(ShutdownReason::Manual).await;
let msgs = transport.messages();
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].0, "#channel1");
assert_eq!(msgs[1].0, "#channel2");
// Message must indicate manual stop.
assert!(
msgs[0].1.contains("offline"),
"expected 'offline' in manual message: {}",
msgs[0].1
);
assert!(
msgs[0].1.contains("stopped") || msgs[0].1.contains("manual"),
"expected reason in manual message: {}",
msgs[0].1
);
}
#[tokio::test]
async fn notify_rebuild_sends_rebuild_reason() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#general".to_string()],
"Timmy".to_string(),
);
notifier.notify(ShutdownReason::Rebuild).await;
let msgs = transport.messages();
assert_eq!(msgs.len(), 1);
// Message must indicate rebuild, not manual stop.
assert!(
msgs[0].1.contains("build") || msgs[0].1.contains("rebuild"),
"expected rebuild reason in message: {}",
msgs[0].1
);
}
#[tokio::test]
async fn notify_manual_and_rebuild_messages_are_distinct() {
let transport_a = Arc::new(CapturingTransport::new());
let notifier_a = BotShutdownNotifier::new(
Arc::clone(&transport_a) as Arc<dyn ChatTransport>,
vec!["C1".to_string()],
"Bot".to_string(),
);
notifier_a.notify(ShutdownReason::Manual).await;
let transport_b = Arc::new(CapturingTransport::new());
let notifier_b = BotShutdownNotifier::new(
Arc::clone(&transport_b) as Arc<dyn ChatTransport>,
vec!["C1".to_string()],
"Bot".to_string(),
);
notifier_b.notify(ShutdownReason::Rebuild).await;
let manual_msg = &transport_a.messages()[0].1;
let rebuild_msg = &transport_b.messages()[0].1;
assert_ne!(manual_msg, rebuild_msg, "manual and rebuild messages must differ");
}
#[tokio::test]
async fn notify_is_best_effort_failing_send_does_not_panic() {
// A transport that always fails should not cause notify() to panic or
// return an error — the failure is swallowed silently.
let transport = Arc::new(CapturingTransport::failing());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#channel".to_string()],
"Timmy".to_string(),
);
// Should complete without panicking.
notifier.notify(ShutdownReason::Manual).await;
}
#[tokio::test]
async fn notify_with_no_channels_is_noop() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec![],
"Timmy".to_string(),
);
notifier.notify(ShutdownReason::Manual).await;
assert!(transport.messages().is_empty());
}
// -- notify_startup -------------------------------------------------------
#[tokio::test]
async fn notify_startup_sends_online_message_to_all_channels() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#channel1".to_string(), "#channel2".to_string()],
"Timmy".to_string(),
);
notifier.notify_startup().await;
let msgs = transport.messages();
assert_eq!(msgs.len(), 2);
assert_eq!(msgs[0].0, "#channel1");
assert_eq!(msgs[1].0, "#channel2");
assert!(
msgs[0].1.contains("online"),
"expected 'online' in startup message: {}",
msgs[0].1
);
assert!(
msgs[0].1.contains("Timmy"),
"expected bot name in startup message: {}",
msgs[0].1
);
}
#[tokio::test]
async fn notify_startup_message_uses_bot_name() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#general".to_string()],
"HAL".to_string(),
);
notifier.notify_startup().await;
let msgs = transport.messages();
assert_eq!(msgs[0].1, "HAL is online.");
}
#[tokio::test]
async fn notify_startup_with_no_channels_is_noop() {
let transport = Arc::new(CapturingTransport::new());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec![],
"Timmy".to_string(),
);
notifier.notify_startup().await;
assert!(transport.messages().is_empty());
}
#[tokio::test]
async fn notify_startup_is_best_effort_failing_send_does_not_panic() {
let transport = Arc::new(CapturingTransport::failing());
let notifier = BotShutdownNotifier::new(
Arc::clone(&transport) as Arc<dyn ChatTransport>,
vec!["#channel".to_string()],
"Timmy".to_string(),
);
// Should complete without panicking.
notifier.notify_startup().await;
}
#[tokio::test]
async fn notify_startup_message_differs_from_shutdown_message() {
let transport_start = Arc::new(CapturingTransport::new());
let notifier_start = BotShutdownNotifier::new(
Arc::clone(&transport_start) as Arc<dyn ChatTransport>,
vec!["C1".to_string()],
"Bot".to_string(),
);
notifier_start.notify_startup().await;
let transport_stop = Arc::new(CapturingTransport::new());
let notifier_stop = BotShutdownNotifier::new(
Arc::clone(&transport_stop) as Arc<dyn ChatTransport>,
vec!["C1".to_string()],
"Bot".to_string(),
);
notifier_stop.notify(ShutdownReason::Manual).await;
let startup_msg = &transport_start.messages()[0].1;
let shutdown_msg = &transport_stop.messages()[0].1;
assert_ne!(startup_msg, shutdown_msg, "startup and shutdown messages must differ");
}
}