storkit: merge 355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart

This commit is contained in:
Dave
2026-03-20 15:27:36 +00:00
parent cb663b620b
commit 0cb43a4de4
7 changed files with 297 additions and 91 deletions

View File

@@ -1,4 +1,4 @@
use crate::agents::{AgentStatus, move_story_to_stage}; use crate::agents::move_story_to_stage;
use crate::http::context::AppContext; use crate::http::context::AppContext;
use crate::log_buffer; use crate::log_buffer;
use crate::slog; use crate::slog;
@@ -26,98 +26,11 @@ pub(super) fn tool_get_server_logs(args: &Value) -> Result<String, String> {
Ok(all_lines[start..].join("\n")) Ok(all_lines[start..].join("\n"))
} }
/// Rebuild the server binary and re-exec. /// Rebuild the server binary and re-exec (delegates to `crate::rebuild`).
///
/// 1. Gracefully stops all running agents (kills PTY children).
/// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
/// the current build profile (debug or release).
/// 3. If the build fails, returns the build error (server stays up).
/// 4. If the build succeeds, re-execs the process with the new binary via
/// `std::os::unix::process::CommandExt::exec()`.
pub(super) async fn tool_rebuild_and_restart(ctx: &AppContext) -> Result<String, String> { pub(super) async fn tool_rebuild_and_restart(ctx: &AppContext) -> Result<String, String> {
slog!("[rebuild] Rebuild and restart requested via MCP tool"); slog!("[rebuild] Rebuild and restart requested via MCP tool");
let project_root = ctx.state.get_project_root().unwrap_or_default();
// 1. Gracefully stop all running agents. crate::rebuild::rebuild_and_restart(&ctx.agents, &project_root).await
let running_agents = ctx.agents.list_agents().unwrap_or_default();
let running_count = running_agents
.iter()
.filter(|a| a.status == AgentStatus::Running)
.count();
if running_count > 0 {
slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
}
ctx.agents.kill_all_children();
// 2. Find the workspace root (parent of the server binary's source).
// CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
// the workspace root is its parent.
let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
let workspace_root = manifest_dir
.parent()
.ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
slog!(
"[rebuild] Building server from workspace root: {}",
workspace_root.display()
);
// 3. Build the server binary, matching the current build profile so the
// re-exec via current_exe() picks up the new binary.
let build_args: Vec<&str> = if cfg!(debug_assertions) {
vec!["build", "-p", "storkit"]
} else {
vec!["build", "--release", "-p", "storkit"]
};
slog!("[rebuild] cargo {}", build_args.join(" "));
let output = tokio::task::spawn_blocking({
let workspace_root = workspace_root.to_path_buf();
move || {
std::process::Command::new("cargo")
.args(&build_args)
.current_dir(&workspace_root)
.output()
}
})
.await
.map_err(|e| format!("Build task panicked: {e}"))?
.map_err(|e| format!("Failed to run cargo build: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
slog!("[rebuild] Build failed:\n{stderr}");
return Err(format!("Build failed:\n{stderr}"));
}
slog!("[rebuild] Build succeeded, re-execing with new binary");
// 4. Re-exec with the new binary.
// Collect current argv so we preserve any CLI arguments (e.g. project path).
let current_exe =
std::env::current_exe().map_err(|e| format!("Cannot determine current executable: {e}"))?;
let args: Vec<String> = std::env::args().collect();
// Remove the port file before re-exec so the new process can write its own.
if let Ok(root) = ctx.state.get_project_root() {
let port_file = root.join(".storkit_port");
if port_file.exists() {
let _ = std::fs::remove_file(&port_file);
}
}
// Also check cwd for port file.
let cwd_port_file = std::path::Path::new(".storkit_port");
if cwd_port_file.exists() {
let _ = std::fs::remove_file(cwd_port_file);
}
// Use exec() to replace the current process.
// This never returns on success.
use std::os::unix::process::CommandExt;
let err = std::process::Command::new(&current_exe)
.args(&args[1..])
.exec();
// If we get here, exec() failed.
Err(format!("Failed to exec new binary: {err}"))
} }
/// Generate a Claude Code permission rule string for the given tool name and input. /// Generate a Claude Code permission rule string for the given tool name and input.

View File

@@ -10,6 +10,7 @@ mod io;
mod llm; mod llm;
pub mod log_buffer; pub mod log_buffer;
mod matrix; mod matrix;
pub mod rebuild;
pub mod slack; pub mod slack;
mod state; mod state;
mod store; mod store;

View File

@@ -960,6 +960,39 @@ async fn on_room_message(
return; return;
} }
// Check for the rebuild command, which requires async agent and process ops
// and cannot be handled by the sync command registry.
if super::rebuild::extract_rebuild_command(
&user_message,
&ctx.bot_name,
ctx.bot_user_id.as_str(),
)
.is_some()
{
slog!("[matrix-bot] Handling rebuild command from {sender}");
// Acknowledge immediately — the rebuild may take a while or re-exec.
let ack = "Rebuilding server… this may take a moment.";
let ack_html = markdown_to_html(ack);
if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, ack, &ack_html).await
&& let Ok(event_id) = msg_id.parse()
{
ctx.bot_sent_event_ids.lock().await.insert(event_id);
}
let response = super::rebuild::handle_rebuild(
&ctx.bot_name,
&ctx.project_root,
&ctx.agents,
)
.await;
let html = markdown_to_html(&response);
if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, &response, &html).await
&& let Ok(event_id) = msg_id.parse()
{
ctx.bot_sent_event_ids.lock().await.insert(event_id);
}
return;
}
// Spawn a separate task so the Matrix sync loop is not blocked while we // Spawn a separate task so the Matrix sync loop is not blocked while we
// wait for the LLM response (which can take several seconds). // wait for the LLM response (which can take several seconds).
tokio::spawn(async move { tokio::spawn(async move {

View File

@@ -135,6 +135,11 @@ pub fn commands() -> &'static [BotCommand] {
description: "Clear the current Claude Code session and start fresh", description: "Clear the current Claude Code session and start fresh",
handler: handle_reset_fallback, handler: handle_reset_fallback,
}, },
BotCommand {
name: "rebuild",
description: "Rebuild the server binary and restart",
handler: handle_rebuild_fallback,
},
] ]
} }
@@ -260,6 +265,16 @@ fn handle_reset_fallback(_ctx: &CommandContext) -> Option<String> {
None None
} }
/// Fallback handler for the `rebuild` command when it is not intercepted by
/// the async handler in `on_room_message`. In practice this is never called —
/// rebuild is detected and handled before `try_handle_command` is invoked.
/// The entry exists in the registry only so `help` lists it.
///
/// Returns `None` to prevent the LLM from receiving "rebuild" as a prompt.
fn handle_rebuild_fallback(_ctx: &CommandContext) -> Option<String> {
None
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Tests // Tests
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@@ -20,6 +20,7 @@ pub mod commands;
mod config; mod config;
pub mod delete; pub mod delete;
pub mod htop; pub mod htop;
pub mod rebuild;
pub mod reset; pub mod reset;
pub mod start; pub mod start;
pub mod notifications; pub mod notifications;

View File

@@ -0,0 +1,145 @@
//! Rebuild command: trigger a server rebuild and restart.
//!
//! `{bot_name} rebuild` stops all running agents, rebuilds the server binary
//! with `cargo build`, and re-execs the process with the new binary. If the
//! build fails the error is reported back to the room and the server keeps
//! running.
use crate::agents::AgentPool;
use std::path::Path;
use std::sync::Arc;
/// A parsed rebuild command.
#[derive(Debug, PartialEq)]
pub struct RebuildCommand;
/// Parse a rebuild command from a raw message body.
///
/// Strips the bot mention prefix and checks whether the command word is
/// `rebuild`. Returns `None` when the message is not a rebuild command.
pub fn extract_rebuild_command(
message: &str,
bot_name: &str,
bot_user_id: &str,
) -> Option<RebuildCommand> {
let stripped = strip_mention(message, bot_name, bot_user_id);
let trimmed = stripped
.trim()
.trim_start_matches(|c: char| !c.is_alphanumeric());
let cmd = match trimmed.split_once(char::is_whitespace) {
Some((c, _)) => c,
None => trimmed,
};
if cmd.eq_ignore_ascii_case("rebuild") {
Some(RebuildCommand)
} else {
None
}
}
/// Handle a rebuild command: trigger server rebuild and restart.
///
/// Returns a string describing the outcome. On build failure the error
/// message is returned so it can be posted to the room; the server keeps
/// running. On success this function never returns (the process re-execs).
pub async fn handle_rebuild(
bot_name: &str,
project_root: &Path,
agents: &Arc<AgentPool>,
) -> String {
crate::slog!("[matrix-bot] rebuild command received (bot={bot_name})");
match crate::rebuild::rebuild_and_restart(agents, project_root).await {
Ok(msg) => msg,
Err(e) => format!("Rebuild failed: {e}"),
}
}
/// Strip the bot mention prefix from a raw Matrix message body.
fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
let trimmed = message.trim();
if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
return rest;
}
if let Some(localpart) = bot_user_id.split(':').next()
&& let Some(rest) = strip_prefix_ci(trimmed, localpart)
{
return rest;
}
if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
return rest;
}
trimmed
}
fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
if text.len() < prefix.len() {
return None;
}
if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
return None;
}
let rest = &text[prefix.len()..];
match rest.chars().next() {
None => Some(rest),
Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
_ => Some(rest),
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_with_display_name() {
let cmd = extract_rebuild_command("Timmy rebuild", "Timmy", "@timmy:home.local");
assert_eq!(cmd, Some(RebuildCommand));
}
#[test]
fn extract_with_full_user_id() {
let cmd = extract_rebuild_command(
"@timmy:home.local rebuild",
"Timmy",
"@timmy:home.local",
);
assert_eq!(cmd, Some(RebuildCommand));
}
#[test]
fn extract_with_localpart() {
let cmd = extract_rebuild_command("@timmy rebuild", "Timmy", "@timmy:home.local");
assert_eq!(cmd, Some(RebuildCommand));
}
#[test]
fn extract_case_insensitive() {
let cmd = extract_rebuild_command("Timmy REBUILD", "Timmy", "@timmy:home.local");
assert_eq!(cmd, Some(RebuildCommand));
}
#[test]
fn extract_non_rebuild_returns_none() {
let cmd = extract_rebuild_command("Timmy help", "Timmy", "@timmy:home.local");
assert_eq!(cmd, None);
}
#[test]
fn extract_ignores_extra_args() {
// "rebuild" with trailing text is still a rebuild command
let cmd = extract_rebuild_command("Timmy rebuild now", "Timmy", "@timmy:home.local");
assert_eq!(cmd, Some(RebuildCommand));
}
#[test]
fn extract_no_match_returns_none() {
let cmd = extract_rebuild_command("Timmy status", "Timmy", "@timmy:home.local");
assert_eq!(cmd, None);
}
}

98
server/src/rebuild.rs Normal file
View File

@@ -0,0 +1,98 @@
//! Server rebuild and restart logic shared between the MCP tool and Matrix bot command.
use crate::agents::AgentPool;
use crate::slog;
use std::path::Path;
/// Rebuild the server binary and re-exec.
///
/// 1. Gracefully stops all running agents (kills PTY children).
/// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
/// the current build profile (debug or release).
/// 3. If the build fails, returns the build error (server stays up).
/// 4. If the build succeeds, re-execs the process with the new binary via
/// `std::os::unix::process::CommandExt::exec()`.
pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Result<String, String> {
slog!("[rebuild] Rebuild and restart requested");
// 1. Gracefully stop all running agents.
let running_count = agents
.list_agents()
.unwrap_or_default()
.iter()
.filter(|a| a.status == crate::agents::AgentStatus::Running)
.count();
if running_count > 0 {
slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
}
agents.kill_all_children();
// 2. Find the workspace root (parent of the server binary's source).
// CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
// the workspace root is its parent.
let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
let workspace_root = manifest_dir
.parent()
.ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
slog!(
"[rebuild] Building server from workspace root: {}",
workspace_root.display()
);
// 3. Build the server binary, matching the current build profile so the
// re-exec via current_exe() picks up the new binary.
let build_args: Vec<&str> = if cfg!(debug_assertions) {
vec!["build", "-p", "storkit"]
} else {
vec!["build", "--release", "-p", "storkit"]
};
slog!("[rebuild] cargo {}", build_args.join(" "));
let output = tokio::task::spawn_blocking({
let workspace_root = workspace_root.to_path_buf();
move || {
std::process::Command::new("cargo")
.args(&build_args)
.current_dir(&workspace_root)
.output()
}
})
.await
.map_err(|e| format!("Build task panicked: {e}"))?
.map_err(|e| format!("Failed to run cargo build: {e}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
slog!("[rebuild] Build failed:\n{stderr}");
return Err(format!("Build failed:\n{stderr}"));
}
slog!("[rebuild] Build succeeded, re-execing with new binary");
// 4. Re-exec with the new binary.
// Collect current argv so we preserve any CLI arguments (e.g. project path).
let current_exe =
std::env::current_exe().map_err(|e| format!("Cannot determine current executable: {e}"))?;
let args: Vec<String> = std::env::args().collect();
// Remove the port file before re-exec so the new process can write its own.
let port_file = project_root.join(".storkit_port");
if port_file.exists() {
let _ = std::fs::remove_file(&port_file);
}
// Also check cwd for port file.
let cwd_port_file = std::path::Path::new(".storkit_port");
if cwd_port_file.exists() {
let _ = std::fs::remove_file(cwd_port_file);
}
// Use exec() to replace the current process.
// This never returns on success.
use std::os::unix::process::CommandExt;
let err = std::process::Command::new(&current_exe)
.args(&args[1..])
.exec();
// If we get here, exec() failed.
Err(format!("Failed to exec new binary: {err}"))
}