From 8754c790b9378c0143aca01d21952f60a557458a Mon Sep 17 00:00:00 2001 From: dave Date: Wed, 13 May 2026 23:07:32 +0000 Subject: [PATCH] huskies: merge 1013 --- crates/source-map-gen/src/lib.rs | 100 +++++++++++++++++++++++++++--- crates/source-map-gen/src/main.rs | 87 +++++++++++++++++++------- 2 files changed, 154 insertions(+), 33 deletions(-) diff --git a/crates/source-map-gen/src/lib.rs b/crates/source-map-gen/src/lib.rs index 41a8ab0c..bd592cba 100644 --- a/crates/source-map-gen/src/lib.rs +++ b/crates/source-map-gen/src/lib.rs @@ -5,8 +5,9 @@ //! extension (`.rs` → [`RustAdapter`], `.ts`/`.tsx` → [`TypeScriptAdapter`]). //! //! The entry point for agent spawn integration is [`update_for_worktree`], which -//! runs `git diff --name-only` to find changed files and updates the source map for -//! those that pass the documentation coverage check. +//! finds changed files and updates the source map for those that pass the documentation +//! coverage check. [`added_line_ranges`] covers all git states — committed, staged, +//! unstaged, and untracked — so doc-gap detection is independent of index state. mod rust_adapter; mod ts_adapter; @@ -141,30 +142,78 @@ fn parse_added_ranges(diff: &str) -> Vec> { ranges } -/// Returns the 1-based line ranges in `file` that were added since `base` in `worktree`. +/// Returns the 1-based line ranges in `file` that were added relative to `base` in `worktree`. /// -/// Uses `git diff --unified=0 {base}...HEAD -- {file}` and parses the hunk headers. -/// Returns an empty `Vec` on git errors or when there are no added lines. +/// Covers all git states: +/// - Untracked files (not yet `git add`-ed): the entire file is treated as added. +/// - Committed changes since `base`: `git diff --unified=0 {base}...HEAD` +/// - Staged changes: `git diff --unified=0 --cached` +/// - Unstaged changes: `git diff --unified=0` +/// +/// Returns an empty `Vec` when there are no additions in any state. pub fn added_line_ranges( worktree: &Path, base: &str, file: &Path, ) -> Vec> { let rel = file.strip_prefix(worktree).unwrap_or(file); - let output = Command::new("git") + let rel_str = rel.to_string_lossy(); + + // For untracked files, every line is a new addition. + let tracked = Command::new("git") + .args(["ls-files", "--", &*rel_str]) + .current_dir(worktree) + .output(); + if let Ok(out) = tracked + && out.status.success() + && out.stdout.is_empty() + { + let line_count = std::fs::read_to_string(file) + .map(|s| s.lines().count()) + .unwrap_or(0); + return if line_count > 0 { + vec![1..=line_count] + } else { + Vec::new() + }; + } + + let mut ranges = Vec::new(); + + // Committed changes since base. + let committed = Command::new("git") .args([ "diff", "--unified=0", &format!("{base}...HEAD"), "--", - &rel.to_string_lossy(), + &*rel_str, ]) .current_dir(worktree) .output(); - match output { - Ok(o) => parse_added_ranges(&String::from_utf8_lossy(&o.stdout)), - Err(_) => Vec::new(), + if let Ok(o) = committed { + ranges.extend(parse_added_ranges(&String::from_utf8_lossy(&o.stdout))); } + + // Staged changes not yet committed. + let staged = Command::new("git") + .args(["diff", "--unified=0", "--cached", "--", &*rel_str]) + .current_dir(worktree) + .output(); + if let Ok(o) = staged { + ranges.extend(parse_added_ranges(&String::from_utf8_lossy(&o.stdout))); + } + + // Unstaged changes to tracked files. + let unstaged = Command::new("git") + .args(["diff", "--unified=0", "--", &*rel_str]) + .current_dir(worktree) + .output(); + if let Ok(o) = unstaged { + ranges.extend(parse_added_ranges(&String::from_utf8_lossy(&o.stdout))); + } + + ranges } /// Check documentation coverage, reporting only violations in lines added since `base`. @@ -814,6 +863,37 @@ mod tests { ); } + /// AC2: an untracked Rust file lacking a doc comment is caught by `check_files_ratcheted`. + /// + /// The file is never `git add`-ed, so it is invisible to `git diff {base}...HEAD`. + /// The ratchet must still surface the missing-doc failure. + #[test] + fn untracked_file_with_missing_doc_fails() { + let tmp = TempDir::new().unwrap(); + init_git_repo(tmp.path()); + + // Base commit so there is a HEAD to diff against. + Command::new("git") + .args(["commit", "--allow-empty", "-m", "base"]) + .current_dir(tmp.path()) + .output() + .unwrap(); + + // Write a new Rust file with a missing doc comment but do NOT `git add` it. + write_rs( + tmp.path(), + "untracked.rs", + "//! Module doc.\n\npub fn no_doc_here() {}\n", + ); + + let file = tmp.path().join("untracked.rs"); + let result = check_files_ratcheted(&[file.as_path()], tmp.path(), "HEAD"); + assert!( + matches!(&result, CheckResult::Failures(v) if v.iter().any(|f| f.item_name == "no_doc_here")), + "expected failure for undocumented fn in untracked file, got {result:?}" + ); + } + /// `relative_key` strips the root prefix from an absolute path. #[test] fn relative_key_strips_root_prefix() { diff --git a/crates/source-map-gen/src/main.rs b/crates/source-map-gen/src/main.rs index ce827da0..ea771a22 100644 --- a/crates/source-map-gen/src/main.rs +++ b/crates/source-map-gen/src/main.rs @@ -5,8 +5,13 @@ //! Exits with code 1 and prints LLM-friendly directions when public items are //! missing doc comments. Exits 0 (silently) when all changed files are fully //! documented or when there are no relevant changes to check. +//! +//! The file set is derived from all worktree states: committed changes since +//! `base`, staged changes, unstaged changes, and untracked files. This ensures +//! the result is independent of git index state. use source_map_gen::{CheckResult, check_files_ratcheted}; +use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::process::Command; @@ -17,29 +22,7 @@ fn main() { let worktree_path = Path::new(&worktree); - let output = match Command::new("git") - .args(["diff", "--name-only", &format!("{base}...HEAD")]) - .current_dir(worktree_path) - .output() - { - Ok(o) => o, - Err(e) => { - eprintln!("source-map-check: git diff failed: {e}"); - std::process::exit(1); - } - }; - - if !output.status.success() { - // Base branch not found or other git error — skip the check gracefully. - return; - } - - let changed: Vec = String::from_utf8_lossy(&output.stdout) - .lines() - .filter(|l| !l.is_empty()) - .map(|l| worktree_path.join(l)) - .filter(|p| p.exists()) - .collect(); + let changed = collect_changed_files(worktree_path, &base); if changed.is_empty() { return; @@ -64,6 +47,64 @@ fn main() { } } +/// Collect all files that differ from `base` in any git state: committed, staged, +/// unstaged, or untracked. Returns deduplicated absolute paths that exist on disk. +fn collect_changed_files(worktree_path: &Path, base: &str) -> Vec { + let mut names: HashSet = HashSet::new(); + + // Committed changes since base (three-dot diff handles divergent histories). + run_git_name_list( + worktree_path, + &["diff", "--name-only", &format!("{base}...HEAD")], + &mut names, + ); + + // Staged changes not yet committed. + run_git_name_list( + worktree_path, + &["diff", "--name-only", "--cached"], + &mut names, + ); + + // Unstaged changes to tracked files. + run_git_name_list(worktree_path, &["diff", "--name-only"], &mut names); + + // Untracked files (new files not yet added to the index). + run_git_name_list( + worktree_path, + &["ls-files", "--others", "--exclude-standard"], + &mut names, + ); + + names + .into_iter() + .map(|l| worktree_path.join(l)) + .filter(|p| p.exists()) + .collect() +} + +/// Run a git command and collect each non-empty output line into `out`. +/// +/// Silently ignores git errors so a missing base branch or a fresh repo without +/// any commits does not abort the check. +fn run_git_name_list(worktree_path: &Path, args: &[&str], out: &mut HashSet) { + let Ok(output) = Command::new("git") + .args(args) + .current_dir(worktree_path) + .output() + else { + return; + }; + if !output.status.success() { + return; + } + for line in String::from_utf8_lossy(&output.stdout).lines() { + if !line.is_empty() { + out.insert(line.to_string()); + } + } +} + /// Parse a flag value from an argument list (e.g. `--flag value`). fn parse_arg(args: &[String], flag: &str) -> Option { args.windows(2).find(|w| w[0] == flag).map(|w| w[1].clone())