huskies: merge 618_story_extract_mcp_only_domain_services

2026-04-24 21:12:03 +00:00
parent 360bca45c8
commit c16d9e471d
29 changed files with 1924 additions and 409 deletions
@@ -0,0 +1,64 @@
+//! Shell I/O — the ONLY place in `service::shell/` that may perform side effects.
+//!
+//! Side effects here include: filesystem existence and canonicalization checks,
+//! process spawning via `std::process::Command`, and reading pipe output.
+//! All pure logic (pattern matching, output truncation, count parsing) lives in
+//! `path_guard.rs`.
+
+use super::Error;
+use std::path::{Path, PathBuf};
+
+/// Validate that `working_dir` is an absolute path that exists on disk and
+/// lies inside the project's `.huskies/worktrees/` or `.huskies/merge_workspace/`
+/// directory. Returns the canonicalized path on success.
+///
+/// # Errors
+/// - [`Error::Validation`] if the path is relative or does not exist.
+/// - [`Error::PathNotAllowed`] if the path is outside the allowed roots.
+/// - [`Error::Io`] if canonicalization fails.
+pub fn validate_working_dir(working_dir: &str, project_root: &Path) -> Result<PathBuf, Error> {
+    let wd = PathBuf::from(working_dir);
+
+    if !wd.is_absolute() {
+        return Err(Error::Validation(
+            "working_dir must be an absolute path".to_string(),
+        ));
+    }
+    if !wd.exists() {
+        return Err(Error::Validation(format!(
+            "working_dir does not exist: {working_dir}"
+        )));
+    }
+
+    let worktrees_root = project_root.join(".huskies").join("worktrees");
+
+    let canonical_wd = wd
+        .canonicalize()
+        .map_err(|e| Error::Io(format!("Cannot canonicalize working_dir: {e}")))?;
+
+    let canonical_wt = if worktrees_root.exists() {
+        worktrees_root
+            .canonicalize()
+            .map_err(|e| Error::Io(format!("Cannot canonicalize worktrees root: {e}")))?
+    } else {
+        return Err(Error::PathNotAllowed(
+            "No worktrees directory found in project".to_string(),
+        ));
+    };
+
+    // Also allow the merge workspace so mergemaster can fix conflicts.
+    let merge_workspace = project_root.join(".huskies").join("merge_workspace");
+    let canonical_mw = merge_workspace.canonicalize().unwrap_or_default();
+
+    let in_worktrees = canonical_wd.starts_with(&canonical_wt);
+    let in_merge_ws =
+        !canonical_mw.as_os_str().is_empty() && canonical_wd.starts_with(&canonical_mw);
+
+    if !in_worktrees && !in_merge_ws {
+        return Err(Error::PathNotAllowed(format!(
+            "working_dir must be inside .huskies/worktrees/ or .huskies/merge_workspace/. Got: {working_dir}"
+        )));
+    }
+
+    Ok(canonical_wd)
+}
@@ -0,0 +1,90 @@
+//! Shell service — command safety, path sandboxing, and output helpers.
+//!
+//! Extracted from `http/mcp/shell_tools.rs` following the conventions in
+//! `docs/architecture/service-modules.md`:
+//! - `mod.rs` (this file) — public API, typed [`Error`], orchestration
+//! - `io.rs` — the ONLY place that performs side effects (filesystem checks)
+//! - `path_guard.rs` — pure command-safety checks and output utilities
+
+pub mod io;
+pub mod path_guard;
+
+#[allow(unused_imports)]
+pub use path_guard::{
+    BLOCKED_BINARIES, BLOCKED_PATTERNS, extract_count, is_dangerous, parse_test_counts,
+    truncate_output,
+};
+
+// ── Error type ────────────────────────────────────────────────────────────────
+
+/// Typed errors returned by `service::shell` functions.
+///
+/// HTTP handlers map these to status codes:
+/// - [`Error::DangerousCommand`] → 400 Bad Request
+/// - [`Error::PathNotAllowed`] → 400 Bad Request (sandbox violation)
+/// - [`Error::Validation`] → 400 Bad Request
+/// - [`Error::Io`] → 500 Internal Server Error
+/// - [`Error::UpstreamFailure`] → 500 Internal Server Error
+#[allow(dead_code)]
+#[derive(Debug)]
+pub enum Error {
+    /// The command matches a blocked pattern or binary.
+    DangerousCommand(String),
+    /// The working directory is outside the allowed sandbox.
+    PathNotAllowed(String),
+    /// A required argument is missing or has an invalid value.
+    Validation(String),
+    /// A filesystem or process I/O operation failed.
+    Io(String),
+    /// An upstream dependency (e.g. the shell) returned an unexpected error.
+    UpstreamFailure(String),
+}
+
+impl std::fmt::Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::DangerousCommand(msg) => write!(f, "Dangerous command: {msg}"),
+            Self::PathNotAllowed(msg) => write!(f, "Path not allowed: {msg}"),
+            Self::Validation(msg) => write!(f, "Validation error: {msg}"),
+            Self::Io(msg) => write!(f, "I/O error: {msg}"),
+            Self::UpstreamFailure(msg) => write!(f, "Upstream failure: {msg}"),
+        }
+    }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn error_display_dangerous_command() {
+        let e = Error::DangerousCommand("rm -rf / detected".to_string());
+        assert!(e.to_string().contains("Dangerous command"));
+    }
+
+    #[test]
+    fn error_display_path_not_allowed() {
+        let e = Error::PathNotAllowed("outside worktrees".to_string());
+        assert!(e.to_string().contains("Path not allowed"));
+    }
+
+    #[test]
+    fn error_display_validation() {
+        let e = Error::Validation("must be absolute".to_string());
+        assert!(e.to_string().contains("Validation error"));
+    }
+
+    #[test]
+    fn error_display_io() {
+        let e = Error::Io("disk full".to_string());
+        assert!(e.to_string().contains("I/O error"));
+    }
+
+    #[test]
+    fn error_display_upstream_failure() {
+        let e = Error::UpstreamFailure("bash not found".to_string());
+        assert!(e.to_string().contains("Upstream failure"));
+    }
+}
@@ -0,0 +1,223 @@
+//! Pure command safety and output helpers for `service::shell`.
+//!
+//! All functions here are free of side effects: no filesystem access,
+//! no process spawning, no I/O of any kind. They may be tested without
+//! temporary directories or an async runtime.
+
+/// Patterns that are unconditionally blocked regardless of context.
+pub static BLOCKED_PATTERNS: &[&str] = &[
+    "rm -rf /",
+    "rm -fr /",
+    "rm -rf /*",
+    "rm -fr /*",
+    "rm --no-preserve-root",
+    ":(){ :|:& };:",
+    "> /dev/sda",
+    "dd if=/dev",
+];
+
+/// Binaries that are unconditionally blocked.
+pub static BLOCKED_BINARIES: &[&str] = &[
+    "sudo", "su", "shutdown", "reboot", "halt", "poweroff", "mkfs",
+];
+
+/// Returns an error message if the command matches a blocked pattern or binary,
+/// or `None` if the command is safe to run.
+///
+/// Checks are purely string-based — no I/O or process inspection.
+pub fn is_dangerous(command: &str) -> Option<String> {
+    let trimmed = command.trim();
+
+    for &pattern in BLOCKED_PATTERNS {
+        if trimmed.contains(pattern) {
+            return Some(format!(
+                "Command blocked: dangerous pattern '{pattern}' detected"
+            ));
+        }
+    }
+
+    if let Some(first_token) = trimmed.split_whitespace().next() {
+        let binary = std::path::Path::new(first_token)
+            .file_name()
+            .and_then(|n| n.to_str())
+            .unwrap_or(first_token);
+        if BLOCKED_BINARIES.contains(&binary) {
+            return Some(format!("Command blocked: '{binary}' is not permitted"));
+        }
+    }
+
+    None
+}
+
+/// Truncate `output` to at most `max_lines` lines, keeping the tail.
+///
+/// If the output fits within `max_lines`, it is returned unchanged.
+/// Otherwise a `"[... N lines omitted ...]"` header is prepended to the
+/// last `max_lines` lines so callers still see the most recent output.
+pub fn truncate_output(output: &str, max_lines: usize) -> String {
+    let lines: Vec<&str> = output.lines().collect();
+    if lines.len() <= max_lines {
+        return output.to_string();
+    }
+    let omitted = lines.len() - max_lines;
+    let tail = lines[lines.len() - max_lines..].join("\n");
+    format!("[... {omitted} lines omitted ...]\n{tail}")
+}
+
+/// Parse cumulative passed/failed counts from `cargo test` output.
+///
+/// Scans each line for `"test result:"` summaries and accumulates the
+/// `passed` and `failed` counts across all crates in the output.
+pub fn parse_test_counts(output: &str) -> (u64, u64) {
+    let mut total_passed = 0u64;
+    let mut total_failed = 0u64;
+    for line in output.lines() {
+        if line.contains("test result:") {
+            if let Some(p) = extract_count(line, "passed") {
+                total_passed += p;
+            }
+            if let Some(f) = extract_count(line, "failed") {
+                total_failed += f;
+            }
+        }
+    }
+    (total_passed, total_failed)
+}
+
+/// Extract the integer immediately before `label` in `line`.
+///
+/// For example, `extract_count("5 passed; 0 failed", "passed")` returns
+/// `Some(5)`. Returns `None` if no digit sequence precedes `label`.
+pub fn extract_count(line: &str, label: &str) -> Option<u64> {
+    let pos = line.find(label)?;
+    let before = line[..pos].trim_end();
+    let num_str: String = before
+        .chars()
+        .rev()
+        .take_while(|c| c.is_ascii_digit())
+        .collect();
+    if num_str.is_empty() {
+        return None;
+    }
+    let num_str: String = num_str.chars().rev().collect();
+    num_str.parse().ok()
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ── is_dangerous ──────────────────────────────────────────────────────────
+
+    #[test]
+    fn is_dangerous_blocks_rm_rf_root() {
+        assert!(is_dangerous("rm -rf /").is_some());
+        assert!(is_dangerous("  rm -rf /  ").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_rm_fr_root() {
+        assert!(is_dangerous("rm -fr /").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_rm_rf_star() {
+        assert!(is_dangerous("rm -rf /*").is_some());
+        assert!(is_dangerous("rm -fr /*").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_sudo() {
+        assert!(is_dangerous("sudo ls").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_shutdown() {
+        assert!(is_dangerous("shutdown -h now").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_mkfs() {
+        assert!(is_dangerous("mkfs /dev/sda1").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_blocks_fork_bomb() {
+        assert!(is_dangerous(":(){ :|:& };:").is_some());
+    }
+
+    #[test]
+    fn is_dangerous_allows_safe_commands() {
+        assert!(is_dangerous("cargo build").is_none());
+        assert!(is_dangerous("npm test").is_none());
+        assert!(is_dangerous("git status").is_none());
+        assert!(is_dangerous("ls -la").is_none());
+        assert!(is_dangerous("rm -rf target/").is_none());
+    }
+
+    // ── truncate_output ───────────────────────────────────────────────────────
+
+    #[test]
+    fn truncate_short_text_unchanged() {
+        let text = "line1\nline2\nline3";
+        assert_eq!(truncate_output(text, 10), text);
+    }
+
+    #[test]
+    fn truncate_long_text_keeps_tail() {
+        let lines: Vec<String> = (1..=200).map(|i| format!("line {i}")).collect();
+        let text = lines.join("\n");
+        let result = truncate_output(&text, 50);
+        assert!(
+            result.contains("line 200"),
+            "should keep last line: {result}"
+        );
+        assert!(
+            result.contains("omitted"),
+            "should note omitted lines: {result}"
+        );
+        assert!(
+            !result.contains("line 1\n"),
+            "should not keep first line: {result}"
+        );
+    }
+
+    #[test]
+    fn truncate_exact_max_unchanged() {
+        let text = "a\nb\nc";
+        assert_eq!(truncate_output(text, 3), text);
+    }
+
+    // ── parse_test_counts ─────────────────────────────────────────────────────
+
+    #[test]
+    fn parse_counts_extracts_passed_and_failed() {
+        let output = "test result: ok. 5 passed; 0 failed; 0 ignored\ntest result: FAILED. 2 passed; 3 failed;";
+        let (passed, failed) = parse_test_counts(output);
+        assert_eq!(passed, 7);
+        assert_eq!(failed, 3);
+    }
+
+    #[test]
+    fn parse_counts_no_results_returns_zeros() {
+        let (passed, failed) = parse_test_counts("no test output here");
+        assert_eq!(passed, 0);
+        assert_eq!(failed, 0);
+    }
+
+    // ── extract_count ─────────────────────────────────────────────────────────
+
+    #[test]
+    fn extract_count_finds_number_before_label() {
+        assert_eq!(extract_count("5 passed; 0 failed", "passed"), Some(5));
+        assert_eq!(extract_count("0 failed", "failed"), Some(0));
+        assert_eq!(extract_count("no number here passed", "passed"), None);
+    }
+
+    #[test]
+    fn extract_count_returns_none_for_missing_label() {
+        assert_eq!(extract_count("5 passed", "failed"), None);
+    }
+}