//! `project-rebuild ` chat command — rebuild Docker image, swap container, preserve state. //! //! Usage: `{bot} project-rebuild [--timeout ] [--force]` //! //! Steps performed: //! 1. Validate the project exists and has a `host_path` configured. //! 2. Check for in-flight coder/merge work (active `claude` processes in the container). //! Wait up to `--timeout` seconds for them to exit. Refuse if still active. //! 3. Build a new Docker image from the project's `Dockerfile.fragment` (if present). //! 4. Stop and remove the old container. //! 5. Start a new container from the fresh image, mounting the same host volume so //! `pipeline.db` and all CRDT state survive untouched. //! 6. Re-register the project in the gateway (same URL — port is preserved). //! //! On success the reply names the new image hash and the new container ID. //! On failure the reply names the step that failed and the recovery path. use crate::service::gateway::config::ProjectEntry; use crate::service::gateway::io::save_config; use std::collections::BTreeMap; use std::path::Path; use std::sync::Arc; use tokio::sync::RwLock; /// Default seconds to wait for in-flight work to drain before refusing. const DEFAULT_DRAIN_TIMEOUT_SECS: u64 = 60; /// A parsed `project-rebuild ` command. #[derive(Debug, PartialEq)] pub struct ProjectRebuildCommand { /// Name of the project to rebuild. pub name: String, /// Seconds to wait for agents to drain (0 = skip check). pub drain_timeout_secs: u64, /// If `true`, skip the drain check entirely. pub force: bool, } /// Parse a `project-rebuild [--timeout ] [--force]` command from a raw /// Matrix message body. /// /// Strips the bot mention prefix and checks for the `project-rebuild` keyword. /// Returns `None` when the message is not a project-rebuild command. pub fn extract_project_rebuild_command( message: &str, bot_name: &str, bot_user_id: &str, ) -> Option { let stripped = crate::chat::util::strip_bot_mention(message, bot_name, bot_user_id); let trimmed = stripped .trim() .trim_start_matches(|c: char| !c.is_alphanumeric()); let rest = if let Some(r) = trimmed.strip_prefix("project-rebuild") { r.trim() } else { return None; }; let mut parts = rest.split_whitespace(); let name = match parts.next() { Some(n) if !n.starts_with("--") => n.to_string(), _ => return None, }; let mut drain_timeout_secs = DEFAULT_DRAIN_TIMEOUT_SECS; let mut force = false; let remaining: Vec<&str> = parts.collect(); let mut i = 0; while i < remaining.len() { match remaining[i] { "--timeout" if i + 1 < remaining.len() => { drain_timeout_secs = remaining[i + 1] .parse() .unwrap_or(DEFAULT_DRAIN_TIMEOUT_SECS); i += 2; } "--force" => { force = true; i += 1; } _ => { i += 1; } } } Some(ProjectRebuildCommand { name, drain_timeout_secs, force, }) } /// Rebuild a project's Docker image, swap the container, and preserve all state. /// /// On success returns a message naming the new image hash and container ID. /// On failure returns a message naming the failed step and the recovery path. pub async fn handle_project_rebuild( name: &str, drain_timeout_secs: u64, force: bool, projects_store: &Arc>>, config_dir: &Path, ) -> String { // ── 1. Validate project ────────────────────────────────────────────────── let (host_path_str, project_url, ssh_port_opt) = { let projects = projects_store.read().await; let entry = match projects.get(name) { Some(e) => e.clone(), None => { let available: Vec<&String> = projects.keys().collect(); return format!( "Project `{name}` not found. Available: {}", available .iter() .map(|s| s.as_str()) .collect::>() .join(", ") ); } }; match entry.host_path.clone() { Some(p) => (p, entry.url.clone(), entry.ssh_port), None => { return format!( "Project `{name}` has no `host_path` configured — cannot rebuild.\n\ Only projects created with `new project --adopt` or `adopt_project` \ support the `project-rebuild` command." ); } } }; let host_path = Path::new(&host_path_str); if !host_path.exists() { return format!( "Host path `{host_path_str}` does not exist on disk — \ cannot rebuild project `{name}`." ); } // ── 2. Drain check ─────────────────────────────────────────────────────── let container_name = format!("huskies-{name}"); if !force && drain_timeout_secs > 0 && let Some(err_msg) = wait_for_drain(&container_name, drain_timeout_secs).await { return format!( "Project `{name}` rebuild aborted: {err_msg}\n\ Pass `--force` to skip the drain check or `--timeout 0` to not wait." ); } // ── 3. Build new image ─────────────────────────────────────────────────── let stacks_dir = config_dir.join("docker").join("stacks"); let (resolved_stack, _warnings) = super::new_project::detect_stack(host_path, &stacks_dir); let base_image = super::new_project::image_for_stack(resolved_stack.as_deref()); let image = match super::new_project::build_project_image(host_path, &base_image, name).await { Ok(img) => img, Err(e) => { return format!( "Rebuild failed at **image build** step.\n\ Error: {e}\n\n\ Recovery: fix `.huskies/Dockerfile.fragment` in `{host_path_str}` then retry." ); } }; let image_hash = get_image_id(&image) .await .unwrap_or_else(|_| "unknown".to_string()); let image_short: String = image_hash.chars().take(19).collect(); // ── 4. Stop and remove old container ──────────────────────────────────── if let Err(e) = docker_stop(&container_name).await { crate::slog!("[project-rebuild] stop '{container_name}': {e} (may already be stopped)"); } if let Err(e) = docker_rm(&container_name).await { return format!( "Rebuild failed at **container remove** step.\n\ Error: {e}\n\n\ Recovery: run `docker rm {container_name}` manually then retry." ); } // ── 5. Start new container ─────────────────────────────────────────────── let port = project_url .as_deref() .and_then(|u| u.rsplit(':').next()) .and_then(|p| p.parse::().ok()) .unwrap_or(3001); let ssh_port = ssh_port_opt.unwrap_or(2222); let home = std::env::var("HOME").unwrap_or_else(|_| "/home/huskies".to_string()); let pub_key_path = std::path::PathBuf::from(&home) .join(".huskies") .join(name) .join("id_ed25519.pub"); let pubkey = match tokio::fs::read_to_string(&pub_key_path).await { Ok(k) => k.trim().to_string(), Err(e) => { return format!( "Rebuild failed at **SSH key read** step.\n\ Error: {e}\n\ Expected public key at `{}`.\n\n\ Recovery: run `ssh-keygen -t ed25519 -N '' -f {home}/.huskies/{name}/id_ed25519` \ then retry.", pub_key_path.display() ); } }; let credentials_file = std::path::PathBuf::from(&home) .join(".claude") .join(".credentials.json"); let creds_opt = if credentials_file.exists() { Some(credentials_file.as_path()) } else { None }; let (git_user_name, git_user_email) = super::new_project::resolve_git_identity(config_dir).await; let mut docker_args = super::new_project::project_docker_run_args( &container_name, port, ssh_port, &pubkey, &git_user_name, &git_user_email, creds_opt, &super::new_project::resolve_gateway_url(), ); docker_args.push("-v".into()); docker_args.push(format!("{host_path_str}:/workspace")); let host_ssh_dir = std::path::PathBuf::from(&home).join(".ssh"); for key_name in &["id_ed25519", "id_rsa"] { let key_path = host_ssh_dir.join(key_name); if key_path.exists() { docker_args.push("-v".into()); docker_args.push(format!( "{}:/home/huskies/.ssh/{key_name}:ro", key_path.display() )); } } docker_args.push("--restart".into()); docker_args.push("unless-stopped".into()); docker_args.push(image.clone()); docker_args.push("huskies".into()); docker_args.push("/workspace".into()); let run_output = tokio::process::Command::new("docker") .args(&docker_args) .output() .await; let container_id = match run_output { Ok(out) if out.status.success() => String::from_utf8_lossy(&out.stdout).trim().to_string(), Ok(out) => { let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string(); return format!( "Rebuild failed at **container start** step.\n\ Error: {stderr}\n\n\ Recovery: the old container was removed. \ Start a new one manually: `docker run -d --name {container_name} ... {image} huskies /workspace`" ); } Err(e) => { return format!( "Rebuild failed at **container start** step.\n\ Error: {e}\n\n\ Recovery: start the container manually: \ `docker run -d --name {container_name} ... {image} huskies /workspace`" ); } }; let container_short: String = container_id.chars().take(12).collect(); // ── 6. Persist updated config (URL is unchanged; project already registered) ──── { let container_url = format!("http://127.0.0.1:{port}"); let mut projects = projects_store.write().await; if let Some(entry) = projects.get_mut(name) { entry.url = Some(container_url.clone()); } save_config(&projects, config_dir).await; crate::crdt_state::write_gateway_project(name, &container_url); } crate::slog!("[project-rebuild] Rebuilt '{name}': image={image_hash} container={container_id}"); format!( "Project **{name}** rebuilt.\n\ - New image: `{image}` (`{image_short}…`)\n\ - New container: `{container_name}` (`{container_short}…`)\n\ - State: `pipeline.db` and CRDT preserved (same volume bind-mount)\n\ - Port: {port} (unchanged)\n\ \n\ Use `switch {name}` then `status` to verify the pipeline." ) } /// Wait for active Claude agent processes in the container to exit. /// /// Polls every 5 seconds until no `claude` processes remain or `timeout_secs` elapses. /// Returns `Some(error_message)` when agents are still running after the timeout, /// `None` when the container is idle or unreachable. async fn wait_for_drain(container_name: &str, timeout_secs: u64) -> Option { let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs); let poll_interval = std::time::Duration::from_secs(5); loop { match count_active_claude_processes(container_name).await { Ok(0) => return None, Ok(n) => { if std::time::Instant::now() >= deadline { return Some(format!( "{n} Claude agent process(es) still running after {timeout_secs}s drain timeout." )); } tokio::time::sleep(poll_interval).await; } Err(_) => { // docker exec failed (container stopped or Docker unavailable) — proceed. return None; } } } } /// Count the number of active `claude` processes inside the given container. /// /// Uses `docker exec pgrep -f claude` — exits 0 with PID list when found, /// exits 1 when no matches (treated as 0 active processes). async fn count_active_claude_processes(container_name: &str) -> Result { let out = tokio::process::Command::new("docker") .args(["exec", container_name, "pgrep", "-f", "claude"]) .output() .await .map_err(|e| e.to_string())?; if out.status.success() { let count = String::from_utf8_lossy(&out.stdout) .lines() .filter(|l| !l.trim().is_empty()) .count(); Ok(count) } else { Ok(0) } } /// Stop a running Docker container (`docker stop`). async fn docker_stop(container_name: &str) -> Result<(), String> { let out = tokio::process::Command::new("docker") .args(["stop", container_name]) .output() .await .map_err(|e| format!("docker stop failed to spawn: {e}"))?; if out.status.success() { Ok(()) } else { Err(String::from_utf8_lossy(&out.stderr).trim().to_string()) } } /// Remove a stopped Docker container (`docker rm`). async fn docker_rm(container_name: &str) -> Result<(), String> { let out = tokio::process::Command::new("docker") .args(["rm", container_name]) .output() .await .map_err(|e| format!("docker rm failed to spawn: {e}"))?; if out.status.success() { Ok(()) } else { Err(String::from_utf8_lossy(&out.stderr).trim().to_string()) } } /// Return the full image ID (sha256 digest) for a named Docker image. async fn get_image_id(image_name: &str) -> Result { let out = tokio::process::Command::new("docker") .args(["inspect", image_name, "--format", "{{.Id}}"]) .output() .await .map_err(|e| format!("docker inspect failed: {e}"))?; if out.status.success() { Ok(String::from_utf8_lossy(&out.stdout).trim().to_string()) } else { Err(String::from_utf8_lossy(&out.stderr).trim().to_string()) } } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; use crate::service::gateway::config::ProjectEntry; use std::collections::BTreeMap; use std::sync::Arc; use tokio::sync::RwLock; fn make_store( projects: Vec<(&str, ProjectEntry)>, ) -> Arc>> { let map: BTreeMap = projects .into_iter() .map(|(k, v)| (k.to_string(), v)) .collect(); Arc::new(RwLock::new(map)) } // ── parsing ──────────────────────────────────────────────────────────── #[test] fn extract_basic_command() { let cmd = extract_project_rebuild_command("Timmy project-rebuild myapp", "Timmy", "@timmy:home"); let cmd = cmd.unwrap(); assert_eq!(cmd.name, "myapp"); assert_eq!(cmd.drain_timeout_secs, DEFAULT_DRAIN_TIMEOUT_SECS); assert!(!cmd.force); } #[test] fn extract_with_force_flag() { let cmd = extract_project_rebuild_command( "@timmy project-rebuild myapp --force", "Timmy", "@timmy:home", ); let cmd = cmd.unwrap(); assert_eq!(cmd.name, "myapp"); assert!(cmd.force); } #[test] fn extract_with_timeout_flag() { let cmd = extract_project_rebuild_command( "Timmy project-rebuild myapp --timeout 120", "Timmy", "@timmy:home", ); let cmd = cmd.unwrap(); assert_eq!(cmd.name, "myapp"); assert_eq!(cmd.drain_timeout_secs, 120); } #[test] fn extract_with_timeout_zero_skips_drain() { let cmd = extract_project_rebuild_command( "Timmy project-rebuild myapp --timeout 0", "Timmy", "@timmy:home", ); let cmd = cmd.unwrap(); assert_eq!(cmd.drain_timeout_secs, 0); } #[test] fn extract_non_rebuild_returns_none() { let cmd = extract_project_rebuild_command("Timmy status", "Timmy", "@timmy:home"); assert!(cmd.is_none()); } #[test] fn extract_rebuild_without_name_returns_none() { let cmd = extract_project_rebuild_command("Timmy project-rebuild", "Timmy", "@timmy:home"); assert!(cmd.is_none()); } #[test] fn extract_with_full_user_id() { let cmd = extract_project_rebuild_command( "@timmy:home project-rebuild alpha", "Timmy", "@timmy:home", ); assert_eq!(cmd.unwrap().name, "alpha"); } #[test] fn extract_case_insensitive_bot_mention() { let cmd = extract_project_rebuild_command("timmy project-rebuild beta", "Timmy", "@timmy:home"); assert_eq!(cmd.unwrap().name, "beta"); } // ── handle_project_rebuild validation ───────────────────────────────── #[tokio::test] async fn rebuild_unknown_project_returns_error() { let store = make_store(vec![]); let dir = tempfile::tempdir().unwrap(); let result = handle_project_rebuild("nonexistent", 0, true, &store, dir.path()).await; assert!( result.contains("not found"), "expected 'not found': {result}" ); } #[tokio::test] async fn rebuild_project_without_host_path_returns_error() { let store = make_store(vec![( "myapp", ProjectEntry { url: Some("http://127.0.0.1:3101".into()), auth_token: None, ssh_port: Some(2201), host_path: None, }, )]); let dir = tempfile::tempdir().unwrap(); let result = handle_project_rebuild("myapp", 0, true, &store, dir.path()).await; assert!( result.contains("host_path"), "expected 'host_path' mention: {result}" ); } #[tokio::test] async fn rebuild_project_with_missing_host_dir_returns_error() { let store = make_store(vec![( "myapp", ProjectEntry { url: Some("http://127.0.0.1:3101".into()), auth_token: None, ssh_port: Some(2201), host_path: Some("/nonexistent/path/xyz123".into()), }, )]); let dir = tempfile::tempdir().unwrap(); let result = handle_project_rebuild("myapp", 0, true, &store, dir.path()).await; assert!( result.contains("does not exist"), "expected 'does not exist': {result}" ); } /// End-to-end flow test: rebuild a project that has a valid host directory. /// /// With `--force` and `--timeout 0` the drain check is skipped. /// The function proceeds to the image build step, which fails when Docker is /// not available in CI. On failure the reply must: /// (a) name the failed step ("image build") /// (b) leave the project still registered in the gateway (state preserved) /// (c) include a recovery path /// /// When Docker IS available and the base image exists this test would exercise /// the full container stop → build → start → re-register flow. #[tokio::test] async fn rebuild_e2e_with_valid_host_path_reaches_image_build_step() { let host_dir = tempfile::tempdir().unwrap(); // Create a minimal .huskies/ directory (simulating an existing project). std::fs::create_dir_all(host_dir.path().join(".huskies")).unwrap(); let store = make_store(vec![( "myapp", ProjectEntry { url: Some("http://127.0.0.1:3101".into()), auth_token: Some("tok".into()), ssh_port: Some(2201), host_path: Some(host_dir.path().to_str().unwrap().to_string()), }, )]); let config_dir = tempfile::tempdir().unwrap(); let result = handle_project_rebuild("myapp", 0, true, &store, config_dir.path()).await; // (a) Step naming: one of several possible failure steps depending on what Docker // binaries are available in the test environment, or a success reply. let names_a_step = result.contains("image build") || result.contains("SSH key") || result.contains("container remove") || result.contains("container start"); let is_success = result.contains("rebuilt"); assert!( names_a_step || is_success, "result should name a step or report success: {result}" ); // (b) State preserved: project is still registered in the gateway store. let projects = store.read().await; assert!( projects.contains_key("myapp"), "project 'myapp' must remain registered after failed rebuild: {result}" ); } }