storkit: merge 355_story_bot_rebuild_command_to_trigger_server_rebuild_and_restart

2026-03-20 15:27:36 +00:00
parent cb663b620b
commit 0cb43a4de4
7 changed files with 297 additions and 91 deletions
--- a/server/src/http/mcp/diagnostics.rs
+++ b/server/src/http/mcp/diagnostics.rs
@@ -1,4 +1,4 @@
-use crate::agents::{AgentStatus, move_story_to_stage};
+use crate::agents::move_story_to_stage;
 use crate::http::context::AppContext;
 use crate::log_buffer;
 use crate::slog;
@@ -26,98 +26,11 @@ pub(super) fn tool_get_server_logs(args: &Value) -> Result<String, String> {
    Ok(all_lines[start..].join("\n"))
 }
-/// Rebuild the server binary and re-exec.
+/// Rebuild the server binary and re-exec (delegates to `crate::rebuild`).
 ///
 /// 1. Gracefully stops all running agents (kills PTY children).
 /// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
 ///    the current build profile (debug or release).
 /// 3. If the build fails, returns the build error (server stays up).
 /// 4. If the build succeeds, re-execs the process with the new binary via
 ///    `std::os::unix::process::CommandExt::exec()`.
 pub(super) async fn tool_rebuild_and_restart(ctx: &AppContext) -> Result<String, String> {
    slog!("[rebuild] Rebuild and restart requested via MCP tool");
-
+    let project_root = ctx.state.get_project_root().unwrap_or_default();
-    // 1. Gracefully stop all running agents.
+    crate::rebuild::rebuild_and_restart(&ctx.agents, &project_root).await
    let running_agents = ctx.agents.list_agents().unwrap_or_default();
    let running_count = running_agents
        .iter()
        .filter(|a| a.status == AgentStatus::Running)
        .count();
    if running_count > 0 {
        slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
    }
    ctx.agents.kill_all_children();
    // 2. Find the workspace root (parent of the server binary's source).
    //    CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
    //    the workspace root is its parent.
    let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
    let workspace_root = manifest_dir
        .parent()
        .ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
    slog!(
        "[rebuild] Building server from workspace root: {}",
        workspace_root.display()
    );
    // 3. Build the server binary, matching the current build profile so the
    //    re-exec via current_exe() picks up the new binary.
    let build_args: Vec<&str> = if cfg!(debug_assertions) {
        vec!["build", "-p", "storkit"]
    } else {
        vec!["build", "--release", "-p", "storkit"]
    };
    slog!("[rebuild] cargo {}", build_args.join(" "));
    let output = tokio::task::spawn_blocking({
        let workspace_root = workspace_root.to_path_buf();
        move || {
            std::process::Command::new("cargo")
                .args(&build_args)
                .current_dir(&workspace_root)
                .output()
        }
    })
    .await
    .map_err(|e| format!("Build task panicked: {e}"))?
    .map_err(|e| format!("Failed to run cargo build: {e}"))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        slog!("[rebuild] Build failed:\n{stderr}");
        return Err(format!("Build failed:\n{stderr}"));
    }
    slog!("[rebuild] Build succeeded, re-execing with new binary");
    // 4. Re-exec with the new binary.
    //    Collect current argv so we preserve any CLI arguments (e.g. project path).
    let current_exe =
        std::env::current_exe().map_err(|e| format!("Cannot determine current executable: {e}"))?;
    let args: Vec<String> = std::env::args().collect();
    // Remove the port file before re-exec so the new process can write its own.
    if let Ok(root) = ctx.state.get_project_root() {
        let port_file = root.join(".storkit_port");
        if port_file.exists() {
            let _ = std::fs::remove_file(&port_file);
        }
    }
    // Also check cwd for port file.
    let cwd_port_file = std::path::Path::new(".storkit_port");
    if cwd_port_file.exists() {
        let _ = std::fs::remove_file(cwd_port_file);
    }
    // Use exec() to replace the current process.
    // This never returns on success.
    use std::os::unix::process::CommandExt;
    let err = std::process::Command::new(&current_exe)
        .args(&args[1..])
        .exec();
    // If we get here, exec() failed.
    Err(format!("Failed to exec new binary: {err}"))
 }
 /// Generate a Claude Code permission rule string for the given tool name and input.
--- a/server/src/main.rs
+++ b/server/src/main.rs
@@ -10,6 +10,7 @@ mod io;
 mod llm;
 pub mod log_buffer;
 mod matrix;
 pub mod rebuild;
 pub mod slack;
 mod state;
 mod store;
--- a/server/src/matrix/bot.rs
+++ b/server/src/matrix/bot.rs
@@ -960,6 +960,39 @@ async fn on_room_message(
        return;
    }
    // Check for the rebuild command, which requires async agent and process ops
    // and cannot be handled by the sync command registry.
    if super::rebuild::extract_rebuild_command(
        &user_message,
        &ctx.bot_name,
        ctx.bot_user_id.as_str(),
    )
    .is_some()
    {
        slog!("[matrix-bot] Handling rebuild command from {sender}");
        // Acknowledge immediately — the rebuild may take a while or re-exec.
        let ack = "Rebuilding server… this may take a moment.";
        let ack_html = markdown_to_html(ack);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, ack, &ack_html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        let response = super::rebuild::handle_rebuild(
            &ctx.bot_name,
            &ctx.project_root,
            &ctx.agents,
        )
        .await;
        let html = markdown_to_html(&response);
        if let Ok(msg_id) = ctx.transport.send_message(&room_id_str, &response, &html).await
            && let Ok(event_id) = msg_id.parse()
        {
            ctx.bot_sent_event_ids.lock().await.insert(event_id);
        }
        return;
    }
    // Spawn a separate task so the Matrix sync loop is not blocked while we
    // wait for the LLM response (which can take several seconds).
    tokio::spawn(async move {
--- a/server/src/matrix/commands/mod.rs
+++ b/server/src/matrix/commands/mod.rs
@@ -135,6 +135,11 @@ pub fn commands() -> &'static [BotCommand] {
            description: "Clear the current Claude Code session and start fresh",
            handler: handle_reset_fallback,
        },
        BotCommand {
            name: "rebuild",
            description: "Rebuild the server binary and restart",
            handler: handle_rebuild_fallback,
        },
    ]
 }
@@ -260,6 +265,16 @@ fn handle_reset_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 /// Fallback handler for the `rebuild` command when it is not intercepted by
 /// the async handler in `on_room_message`.  In practice this is never called —
 /// rebuild is detected and handled before `try_handle_command` is invoked.
 /// The entry exists in the registry only so `help` lists it.
 ///
 /// Returns `None` to prevent the LLM from receiving "rebuild" as a prompt.
 fn handle_rebuild_fallback(_ctx: &CommandContext) -> Option<String> {
    None
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
--- a/server/src/matrix/mod.rs
+++ b/server/src/matrix/mod.rs
@@ -20,6 +20,7 @@ pub mod commands;
 mod config;
 pub mod delete;
 pub mod htop;
 pub mod rebuild;
 pub mod reset;
 pub mod start;
 pub mod notifications;
--- a/server/src/matrix/rebuild.rs
+++ b/server/src/matrix/rebuild.rs
@@ -0,0 +1,145 @@
 //! Rebuild command: trigger a server rebuild and restart.
 //!
 //! `{bot_name} rebuild` stops all running agents, rebuilds the server binary
 //! with `cargo build`, and re-execs the process with the new binary.  If the
 //! build fails the error is reported back to the room and the server keeps
 //! running.
 use crate::agents::AgentPool;
 use std::path::Path;
 use std::sync::Arc;
 /// A parsed rebuild command.
 #[derive(Debug, PartialEq)]
 pub struct RebuildCommand;
 /// Parse a rebuild command from a raw message body.
 ///
 /// Strips the bot mention prefix and checks whether the command word is
 /// `rebuild`.  Returns `None` when the message is not a rebuild command.
 pub fn extract_rebuild_command(
    message: &str,
    bot_name: &str,
    bot_user_id: &str,
 ) -> Option<RebuildCommand> {
    let stripped = strip_mention(message, bot_name, bot_user_id);
    let trimmed = stripped
        .trim()
        .trim_start_matches(|c: char| !c.is_alphanumeric());
    let cmd = match trimmed.split_once(char::is_whitespace) {
        Some((c, _)) => c,
        None => trimmed,
    };
    if cmd.eq_ignore_ascii_case("rebuild") {
        Some(RebuildCommand)
    } else {
        None
    }
 }
 /// Handle a rebuild command: trigger server rebuild and restart.
 ///
 /// Returns a string describing the outcome.  On build failure the error
 /// message is returned so it can be posted to the room; the server keeps
 /// running.  On success this function never returns (the process re-execs).
 pub async fn handle_rebuild(
    bot_name: &str,
    project_root: &Path,
    agents: &Arc<AgentPool>,
 ) -> String {
    crate::slog!("[matrix-bot] rebuild command received (bot={bot_name})");
    match crate::rebuild::rebuild_and_restart(agents, project_root).await {
        Ok(msg) => msg,
        Err(e) => format!("Rebuild failed: {e}"),
    }
 }
 /// Strip the bot mention prefix from a raw Matrix message body.
 fn strip_mention<'a>(message: &'a str, bot_name: &str, bot_user_id: &str) -> &'a str {
    let trimmed = message.trim();
    if let Some(rest) = strip_prefix_ci(trimmed, bot_user_id) {
        return rest;
    }
    if let Some(localpart) = bot_user_id.split(':').next()
        && let Some(rest) = strip_prefix_ci(trimmed, localpart)
    {
        return rest;
    }
    if let Some(rest) = strip_prefix_ci(trimmed, bot_name) {
        return rest;
    }
    trimmed
 }
 fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
    if text.len() < prefix.len() {
        return None;
    }
    if !text[..prefix.len()].eq_ignore_ascii_case(prefix) {
        return None;
    }
    let rest = &text[prefix.len()..];
    match rest.chars().next() {
        None => Some(rest),
        Some(c) if c.is_alphanumeric() || c == '-' || c == '_' => None,
        _ => Some(rest),
    }
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn extract_with_display_name() {
        let cmd = extract_rebuild_command("Timmy rebuild", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_with_full_user_id() {
        let cmd = extract_rebuild_command(
            "@timmy:home.local rebuild",
            "Timmy",
            "@timmy:home.local",
        );
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_with_localpart() {
        let cmd = extract_rebuild_command("@timmy rebuild", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_case_insensitive() {
        let cmd = extract_rebuild_command("Timmy REBUILD", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_non_rebuild_returns_none() {
        let cmd = extract_rebuild_command("Timmy help", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, None);
    }
    #[test]
    fn extract_ignores_extra_args() {
        // "rebuild" with trailing text is still a rebuild command
        let cmd = extract_rebuild_command("Timmy rebuild now", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, Some(RebuildCommand));
    }
    #[test]
    fn extract_no_match_returns_none() {
        let cmd = extract_rebuild_command("Timmy status", "Timmy", "@timmy:home.local");
        assert_eq!(cmd, None);
    }
 }
--- a/server/src/rebuild.rs
+++ b/server/src/rebuild.rs
@@ -0,0 +1,98 @@
 //! Server rebuild and restart logic shared between the MCP tool and Matrix bot command.
 use crate::agents::AgentPool;
 use crate::slog;
 use std::path::Path;
 /// Rebuild the server binary and re-exec.
 ///
 /// 1. Gracefully stops all running agents (kills PTY children).
 /// 2. Runs `cargo build [-p storkit]` from the workspace root, matching
 ///    the current build profile (debug or release).
 /// 3. If the build fails, returns the build error (server stays up).
 /// 4. If the build succeeds, re-execs the process with the new binary via
 ///    `std::os::unix::process::CommandExt::exec()`.
 pub async fn rebuild_and_restart(agents: &AgentPool, project_root: &Path) -> Result<String, String> {
    slog!("[rebuild] Rebuild and restart requested");
    // 1. Gracefully stop all running agents.
    let running_count = agents
        .list_agents()
        .unwrap_or_default()
        .iter()
        .filter(|a| a.status == crate::agents::AgentStatus::Running)
        .count();
    if running_count > 0 {
        slog!("[rebuild] Stopping {running_count} running agent(s) before rebuild");
    }
    agents.kill_all_children();
    // 2. Find the workspace root (parent of the server binary's source).
    //    CARGO_MANIFEST_DIR at compile time points to the `server/` crate;
    //    the workspace root is its parent.
    let manifest_dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
    let workspace_root = manifest_dir
        .parent()
        .ok_or_else(|| "Cannot determine workspace root from CARGO_MANIFEST_DIR".to_string())?;
    slog!(
        "[rebuild] Building server from workspace root: {}",
        workspace_root.display()
    );
    // 3. Build the server binary, matching the current build profile so the
    //    re-exec via current_exe() picks up the new binary.
    let build_args: Vec<&str> = if cfg!(debug_assertions) {
        vec!["build", "-p", "storkit"]
    } else {
        vec!["build", "--release", "-p", "storkit"]
    };
    slog!("[rebuild] cargo {}", build_args.join(" "));
    let output = tokio::task::spawn_blocking({
        let workspace_root = workspace_root.to_path_buf();
        move || {
            std::process::Command::new("cargo")
                .args(&build_args)
                .current_dir(&workspace_root)
                .output()
        }
    })
    .await
    .map_err(|e| format!("Build task panicked: {e}"))?
    .map_err(|e| format!("Failed to run cargo build: {e}"))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        slog!("[rebuild] Build failed:\n{stderr}");
        return Err(format!("Build failed:\n{stderr}"));
    }
    slog!("[rebuild] Build succeeded, re-execing with new binary");
    // 4. Re-exec with the new binary.
    //    Collect current argv so we preserve any CLI arguments (e.g. project path).
    let current_exe =
        std::env::current_exe().map_err(|e| format!("Cannot determine current executable: {e}"))?;
    let args: Vec<String> = std::env::args().collect();
    // Remove the port file before re-exec so the new process can write its own.
    let port_file = project_root.join(".storkit_port");
    if port_file.exists() {
        let _ = std::fs::remove_file(&port_file);
    }
    // Also check cwd for port file.
    let cwd_port_file = std::path::Path::new(".storkit_port");
    if cwd_port_file.exists() {
        let _ = std::fs::remove_file(cwd_port_file);
    }
    // Use exec() to replace the current process.
    // This never returns on success.
    use std::os::unix::process::CommandExt;
    let err = std::process::Command::new(&current_exe)
        .args(&args[1..])
        .exec();
    // If we get here, exec() failed.
    Err(format!("Failed to exec new binary: {err}"))
 }