Files
huskies/crates/source-map-gen/src/lib.rs
T
2026-04-29 08:14:31 +00:00

722 lines
24 KiB
Rust

//! LLM-friendly source map generation and documentation coverage checking.
//!
//! Provides a [`LanguageAdapter`] trait that language-specific adapters implement,
//! plus top-level dispatcher functions that route to the right adapter based on file
//! extension (`.rs` → [`RustAdapter`], `.ts`/`.tsx` → [`TypeScriptAdapter`]).
//!
//! The entry point for agent spawn integration is [`update_for_worktree`], which
//! runs `git diff --name-only` to find changed files and updates the source map for
//! those that pass the documentation coverage check.
mod rust_adapter;
mod ts_adapter;
pub use rust_adapter::RustAdapter;
pub use ts_adapter::TypeScriptAdapter;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::process::Command;
/// A missing documentation failure for a single public item.
#[derive(Debug, Clone, PartialEq)]
pub struct CheckFailure {
/// Path to the file containing the undocumented item.
pub file_path: PathBuf,
/// 1-based line number of the item declaration.
pub line: usize,
/// Kind of item (e.g. `"fn"`, `"struct"`, `"module"`).
pub item_kind: String,
/// Name of the item.
pub item_name: String,
}
impl CheckFailure {
/// Returns a human-readable direction a coding agent can act on directly.
pub fn to_direction(&self) -> String {
format!(
"{}:{}: add a doc comment to {} `{}`",
self.file_path.display(),
self.line,
self.item_kind,
self.item_name
)
}
}
/// Result of a documentation coverage check.
#[derive(Debug, Clone, PartialEq)]
pub enum CheckResult {
/// All checked items are documented.
Ok,
/// One or more items are missing documentation.
Failures(Vec<CheckFailure>),
}
/// Language-specific adapter for doc-coverage checking and source map generation.
pub trait LanguageAdapter {
/// Check documentation coverage for `files`.
///
/// Returns [`CheckResult::Ok`] when every public item in every file has a doc
/// comment, or [`CheckResult::Failures`] listing each undocumented item as a
/// direction the coding agent can act on.
fn check(&self, files: &[&Path]) -> CheckResult;
/// Update the source map at `source_map_path` with entries for `passing_files`.
///
/// Reads the existing map, updates only the entries for the provided files, and
/// writes back. Entries for files not in `passing_files` are preserved unchanged.
/// Running twice with the same input produces identical file content (idempotent).
fn update_source_map(
&self,
passing_files: &[&Path],
source_map_path: &Path,
) -> Result<(), String>;
}
/// Returns the adapter for the given file extension, or `None` if unsupported.
fn adapter_for_ext(ext: &str) -> Option<Box<dyn LanguageAdapter>> {
match ext {
"rs" => Some(Box::new(RustAdapter)),
"ts" | "tsx" => Some(Box::new(TypeScriptAdapter)),
_ => None,
}
}
/// Parse added line ranges from a unified diff output.
///
/// Returns the 1-based, inclusive line ranges in the new version of the file
/// that were introduced by the diff. Lines that are context or deletions are
/// not included.
fn parse_added_ranges(diff: &str) -> Vec<std::ops::RangeInclusive<usize>> {
let mut ranges = Vec::new();
for line in diff.lines() {
// Unified diff hunk header: @@ -old[,count] +new[,count] @@
if !line.starts_with("@@") {
continue;
}
let parts: Vec<&str> = line.split_whitespace().collect();
// Expected: ["@@", "-old[,count]", "+new[,count]", "@@", ...]
if parts.len() < 3 {
continue;
}
let new_part = parts[2];
let Some(new_info) = new_part.strip_prefix('+') else {
continue;
};
let (start, count) = if let Some((s, c)) = new_info.split_once(',') {
(
s.parse::<usize>().unwrap_or(0),
c.parse::<usize>().unwrap_or(0),
)
} else {
(new_info.parse::<usize>().unwrap_or(0), 1usize)
};
if count > 0 && start > 0 {
ranges.push(start..=start + count - 1);
}
}
ranges
}
/// Returns the 1-based line ranges in `file` that were added since `base` in `worktree`.
///
/// Uses `git diff --unified=0 {base}...HEAD -- {file}` and parses the hunk headers.
/// Returns an empty `Vec` on git errors or when there are no added lines.
pub fn added_line_ranges(
worktree: &Path,
base: &str,
file: &Path,
) -> Vec<std::ops::RangeInclusive<usize>> {
let rel = file.strip_prefix(worktree).unwrap_or(file);
let output = Command::new("git")
.args([
"diff",
"--unified=0",
&format!("{base}...HEAD"),
"--",
&rel.to_string_lossy(),
])
.current_dir(worktree)
.output();
match output {
Ok(o) => parse_added_ranges(&String::from_utf8_lossy(&o.stdout)),
Err(_) => Vec::new(),
}
}
/// Check documentation coverage, reporting only violations in lines added since `base`.
///
/// Like [`check_files`], but filters each [`CheckFailure`] to items whose declaration
/// line falls within a range added by `git diff {base}...HEAD` against `worktree`.
/// Pre-existing undocumented items whose lines were not touched by the commit are
/// silently ignored.
pub fn check_files_ratcheted(files: &[&Path], worktree: &Path, base: &str) -> CheckResult {
let mut by_ext: HashMap<String, Vec<&Path>> = HashMap::new();
for &file in files {
if let Some(ext) = file.extension().and_then(|e| e.to_str()) {
by_ext.entry(ext.to_string()).or_default().push(file);
}
}
let mut all_failures = Vec::new();
for (ext, ext_files) in &by_ext {
if let Some(adapter) = adapter_for_ext(ext)
&& let CheckResult::Failures(failures) = adapter.check(ext_files)
{
for failure in failures {
let added = added_line_ranges(worktree, base, &failure.file_path);
// Only report if the item's declaration line is within an added range.
// If added is empty (no additions or git error), skip — nothing new to blame.
if !added.is_empty() && added.iter().any(|r| r.contains(&failure.line)) {
all_failures.push(failure);
}
}
}
}
if all_failures.is_empty() {
CheckResult::Ok
} else {
CheckResult::Failures(all_failures)
}
}
/// Check documentation coverage for a mixed list of files.
///
/// Dispatches each file to the appropriate [`LanguageAdapter`] based on its
/// extension. Files with unsupported extensions are silently skipped.
pub fn check_files(files: &[&Path]) -> CheckResult {
let mut by_ext: HashMap<String, Vec<&Path>> = HashMap::new();
for &file in files {
if let Some(ext) = file.extension().and_then(|e| e.to_str()) {
by_ext.entry(ext.to_string()).or_default().push(file);
}
}
let mut all_failures = Vec::new();
for (ext, ext_files) in &by_ext {
if let Some(adapter) = adapter_for_ext(ext)
&& let CheckResult::Failures(mut f) = adapter.check(ext_files)
{
all_failures.append(&mut f);
}
}
if all_failures.is_empty() {
CheckResult::Ok
} else {
CheckResult::Failures(all_failures)
}
}
/// Update the source map at `source_map_path` with entries for `passing_files`.
///
/// Dispatches each file to the appropriate [`LanguageAdapter`] based on extension.
/// Files with unsupported extensions are silently skipped.
pub fn update_source_map(passing_files: &[&Path], source_map_path: &Path) -> Result<(), String> {
let mut by_ext: HashMap<String, Vec<&Path>> = HashMap::new();
for &file in passing_files {
if let Some(ext) = file.extension().and_then(|e| e.to_str()) {
by_ext.entry(ext.to_string()).or_default().push(file);
}
}
for (ext, ext_files) in &by_ext {
if let Some(adapter) = adapter_for_ext(ext) {
adapter.update_source_map(ext_files, source_map_path)?;
}
}
Ok(())
}
/// Update the source map for files that changed since `base_branch` in `worktree_path`.
///
/// 1. Runs `git diff --name-only {base_branch}...HEAD` in the worktree.
/// 2. Checks doc coverage for each changed file (per-file).
/// 3. Calls [`update_source_map`] with the files whose coverage check passes.
///
/// Errors are returned as `Err(String)`; callers in the spawn flow treat them as
/// non-blocking warnings.
pub fn update_for_worktree(
worktree_path: &Path,
base_branch: &str,
source_map_path: &Path,
) -> Result<(), String> {
let output = Command::new("git")
.args(["diff", "--name-only", &format!("{base_branch}...HEAD")])
.current_dir(worktree_path)
.output()
.map_err(|e| format!("git diff: {e}"))?;
if !output.status.success() {
return Err(format!(
"git diff failed: {}",
String::from_utf8_lossy(&output.stderr).trim()
));
}
let changed: Vec<PathBuf> = String::from_utf8_lossy(&output.stdout)
.lines()
.filter(|l| !l.is_empty())
.map(|l| worktree_path.join(l))
.filter(|p| p.exists())
.collect();
if changed.is_empty() {
return Ok(());
}
// Collect files that individually pass the doc check.
let passing: Vec<&Path> = changed
.iter()
.map(PathBuf::as_path)
.filter(|&p| matches!(check_files(&[p]), CheckResult::Ok))
.collect();
if passing.is_empty() {
return Ok(());
}
if let Some(parent) = source_map_path.parent() {
std::fs::create_dir_all(parent).map_err(|e| format!("create_dir_all: {e}"))?;
}
update_source_map(&passing, source_map_path)
}
/// Read the existing source map from `path` as a JSON object.
///
/// Returns an empty map if the file does not exist.
pub(crate) fn read_map(path: &Path) -> Result<serde_json::Map<String, serde_json::Value>, String> {
if !path.exists() {
return Ok(serde_json::Map::new());
}
let content =
std::fs::read_to_string(path).map_err(|e| format!("read {}: {e}", path.display()))?;
serde_json::from_str(&content).map_err(|e| format!("parse source map: {e}"))
}
/// Write `map` to `path` as pretty-printed JSON.
pub(crate) fn write_map(
path: &Path,
map: serde_json::Map<String, serde_json::Value>,
) -> Result<(), String> {
let content = serde_json::to_string_pretty(&serde_json::Value::Object(map))
.map_err(|e| format!("serialize: {e}"))?;
std::fs::write(path, content).map_err(|e| format!("write {}: {e}", path.display()))
}
#[cfg(test)]
mod tests {
use super::*;
use std::process::Command;
use tempfile::TempDir;
fn write_rs(dir: &std::path::Path, name: &str, content: &str) -> PathBuf {
let path = dir.join(name);
std::fs::write(&path, content).unwrap();
path
}
fn write_ts(dir: &std::path::Path, name: &str, content: &str) -> PathBuf {
let path = dir.join(name);
std::fs::write(&path, content).unwrap();
path
}
// --- Rust happy path ---
#[test]
fn rust_check_happy_path_ok() {
let tmp = TempDir::new().unwrap();
let path = write_rs(
tmp.path(),
"foo.rs",
"//! Module doc.\n\n/// A function.\npub fn hello() {}\n",
);
assert_eq!(check_files(&[&path]), CheckResult::Ok);
}
// --- Rust failure path ---
#[test]
fn rust_check_missing_module_doc_yields_failure() {
let tmp = TempDir::new().unwrap();
let path = write_rs(tmp.path(), "foo.rs", "/// A function.\npub fn hello() {}\n");
let result = check_files(&[&path]);
assert!(
matches!(&result, CheckResult::Failures(v) if v.iter().any(|f| f.item_kind == "module")),
"expected module failure, got {result:?}"
);
}
#[test]
fn rust_check_missing_fn_doc_yields_failure_with_correct_fields() {
let tmp = TempDir::new().unwrap();
let path = write_rs(
tmp.path(),
"bar.rs",
"//! Module doc.\n\npub fn undocumented() {}\n",
);
let result = check_files(&[&path]);
if let CheckResult::Failures(failures) = result {
let f = failures.iter().find(|f| f.item_kind == "fn").unwrap();
assert_eq!(f.item_name, "undocumented");
assert_eq!(f.file_path, path);
assert_eq!(f.line, 3);
} else {
panic!("expected failures");
}
}
// --- TypeScript happy path ---
#[test]
fn ts_check_happy_path_ok() {
let tmp = TempDir::new().unwrap();
let path = write_ts(
tmp.path(),
"app.ts",
"/**\n * File doc.\n */\n\n/**\n * Does something.\n */\nexport function hello(): void {}\n",
);
assert_eq!(check_files(&[&path]), CheckResult::Ok);
}
// --- TypeScript failure path ---
#[test]
fn ts_check_missing_file_doc_yields_failure() {
let tmp = TempDir::new().unwrap();
let _path = write_ts(
tmp.path(),
"app.ts",
"/** A function. */\nexport function hello(): void {}\n",
);
// No file-level JSDoc (first non-empty line is not /**)
// Actually this file DOES start with /**, so let's make one that doesn't
let path2 = write_ts(tmp.path(), "app2.ts", "export function hello(): void {}\n");
let result = check_files(&[&path2]);
assert!(
matches!(&result, CheckResult::Failures(v) if v.iter().any(|f| f.item_kind == "file")),
"expected file failure, got {result:?}"
);
}
#[test]
fn ts_check_missing_export_doc_yields_failure() {
let tmp = TempDir::new().unwrap();
let path = write_ts(
tmp.path(),
"app.ts",
"/**\n * File doc.\n */\n\nexport function undocumented(): void {}\n",
);
let result = check_files(&[&path]);
assert!(
matches!(&result, CheckResult::Failures(v) if v.iter().any(|f| f.item_kind == "function" && f.item_name == "undocumented")),
"expected function failure, got {result:?}"
);
}
// --- Update idempotency ---
#[test]
fn update_idempotent_same_input_twice() {
let tmp = TempDir::new().unwrap();
let rs_path = write_rs(
tmp.path(),
"lib.rs",
"//! Module doc.\n\n/// A function.\npub fn foo() {}\n",
);
let map_path = tmp.path().join("source-map.json");
let files: &[&Path] = &[&rs_path];
update_source_map(files, &map_path).unwrap();
let first = std::fs::read_to_string(&map_path).unwrap();
update_source_map(files, &map_path).unwrap();
let second = std::fs::read_to_string(&map_path).unwrap();
assert_eq!(first, second, "update_source_map must be idempotent");
}
// --- update_source_map preserves other entries ---
#[test]
fn update_source_map_preserves_unrelated_entries() {
let tmp = TempDir::new().unwrap();
let map_path = tmp.path().join("source-map.json");
// Write an initial map with an unrelated entry
std::fs::write(&map_path, r#"{"unrelated/file.rs": ["fn old"]}"#).unwrap();
let rs_path = write_rs(
tmp.path(),
"new.rs",
"//! Module doc.\n\n/// A function.\npub fn bar() {}\n",
);
update_source_map(&[&rs_path], &map_path).unwrap();
let content = std::fs::read_to_string(&map_path).unwrap();
assert!(
content.contains("unrelated/file.rs"),
"old entry should be preserved"
);
assert!(content.contains("new.rs"), "new entry should be added");
}
// --- Gate tests: AC3 / AC4 ---
/// AC3: a worktree with a missing module doc fails gates with a recognisable
/// error that references the missing file and line number.
#[test]
fn gate_missing_module_doc_fails_with_file_and_line_in_direction() {
let tmp = TempDir::new().unwrap();
// File has a pub fn but NO //! module doc comment.
let path = write_rs(tmp.path(), "missing_doc.rs", "pub fn no_module_doc() {}\n");
let result = check_files(&[&path]);
assert!(
matches!(&result, CheckResult::Failures(v) if !v.is_empty()),
"expected failures for missing module doc, got {result:?}"
);
if let CheckResult::Failures(failures) = result {
let module_failure = failures
.iter()
.find(|f| f.item_kind == "module")
.expect("expected a module-level failure");
let direction = module_failure.to_direction();
// Direction must name the file so the agent can navigate directly to it.
assert!(
direction.contains("missing_doc.rs"),
"direction must reference the file name: {direction}"
);
// Direction must contain a colon-separated line number.
assert!(
direction.contains(':'),
"direction must contain a file:line reference: {direction}"
);
}
}
/// AC4: a worktree where every changed file has full docs passes gates (Ok result).
#[test]
fn gate_fully_documented_files_pass() {
let tmp = TempDir::new().unwrap();
let path = write_rs(
tmp.path(),
"fully_documented.rs",
"//! Module doc.\n\n/// A function.\npub fn greet() {}\n\n/// A struct.\npub struct Hello;\n",
);
assert_eq!(
check_files(&[&path]),
CheckResult::Ok,
"fully documented file should produce no failures"
);
}
// --- Ratchet tests: AC3 / AC4 ---
/// AC3: a file with N pre-existing undocumented items plus 1 new undocumented item
/// added by the commit reports exactly 1 violation, not N+1.
#[test]
fn ratchet_only_new_undocumented_items_are_flagged() {
let tmp = TempDir::new().unwrap();
init_git_repo(tmp.path());
// Base commit: file with 2 undocumented public fns (pre-existing).
write_rs(
tmp.path(),
"lib.rs",
"//! Module doc.\n\npub fn old_a() {}\npub fn old_b() {}\n",
);
Command::new("git")
.args(["add", "lib.rs"])
.current_dir(tmp.path())
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "base"])
.current_dir(tmp.path())
.output()
.unwrap();
// Second commit: append 1 new undocumented fn.
write_rs(
tmp.path(),
"lib.rs",
"//! Module doc.\n\npub fn old_a() {}\npub fn old_b() {}\npub fn new_c() {}\n",
);
Command::new("git")
.args(["add", "lib.rs"])
.current_dir(tmp.path())
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add new_c"])
.current_dir(tmp.path())
.output()
.unwrap();
let file = tmp.path().join("lib.rs");
let result = check_files_ratcheted(&[file.as_path()], tmp.path(), "HEAD~1");
match result {
CheckResult::Failures(failures) => {
assert_eq!(
failures.len(),
1,
"expected exactly 1 failure (new_c), got {failures:?}"
);
assert_eq!(failures[0].item_name, "new_c");
}
CheckResult::Ok => panic!("expected 1 failure for new_c, got Ok"),
}
}
/// AC4: a commit that doesn't change a file does not blame it for pre-existing
/// undocumented items.
#[test]
fn ratchet_unchanged_file_not_blamed() {
let tmp = TempDir::new().unwrap();
init_git_repo(tmp.path());
// Base commit: undocumented file.
write_rs(
tmp.path(),
"untouched.rs",
"//! Module doc.\n\npub fn old_undocumented() {}\n",
);
Command::new("git")
.args(["add", "untouched.rs"])
.current_dir(tmp.path())
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "base"])
.current_dir(tmp.path())
.output()
.unwrap();
// Second commit: add a different, fully documented file; untouched.rs unchanged.
write_rs(
tmp.path(),
"new_file.rs",
"//! Module doc.\n\n/// A function.\npub fn documented() {}\n",
);
Command::new("git")
.args(["add", "new_file.rs"])
.current_dir(tmp.path())
.output()
.unwrap();
Command::new("git")
.args(["commit", "-m", "add new_file"])
.current_dir(tmp.path())
.output()
.unwrap();
// Simulate passing untouched.rs to the ratcheted check.
// Since it has no added lines in the diff, it should produce no failures.
let file = tmp.path().join("untouched.rs");
let result = check_files_ratcheted(&[file.as_path()], tmp.path(), "HEAD~1");
assert_eq!(
result,
CheckResult::Ok,
"file not touched by the commit should not be blamed"
);
}
// --- parse_added_ranges unit tests ---
#[test]
fn parse_added_ranges_single_hunk() {
let diff = "@@ -0,0 +1,3 @@ some context\n+line1\n+line2\n+line3\n";
let ranges = parse_added_ranges(diff);
assert_eq!(ranges, vec![1..=3]);
}
#[test]
fn parse_added_ranges_multiple_hunks() {
let diff =
"@@ -1,2 +1,3 @@\n context\n+new\n context\n@@ -10,0 +11,2 @@\n+added1\n+added2\n";
let ranges = parse_added_ranges(diff);
assert_eq!(ranges, vec![1..=3, 11..=12]);
}
#[test]
fn parse_added_ranges_empty_diff() {
let ranges = parse_added_ranges("");
assert!(ranges.is_empty());
}
// --- Spawn integration: update_for_worktree writes map at expected path ---
fn init_git_repo(dir: &Path) {
Command::new("git")
.args(["init"])
.current_dir(dir)
.output()
.expect("git init");
Command::new("git")
.args(["config", "user.email", "test@test.com"])
.current_dir(dir)
.output()
.expect("git config email");
Command::new("git")
.args(["config", "user.name", "Test"])
.current_dir(dir)
.output()
.expect("git config name");
Command::new("git")
.args(["commit", "--allow-empty", "-m", "init"])
.current_dir(dir)
.output()
.expect("initial commit");
}
#[test]
fn spawn_integration_map_written_at_expected_path() {
let tmp = TempDir::new().unwrap();
init_git_repo(tmp.path());
// Add a well-documented Rust file and commit it
let rs_path = write_rs(
tmp.path(),
"lib.rs",
"//! Module doc.\n\n/// A function.\npub fn greet() {}\n",
);
Command::new("git")
.args(["add", "lib.rs"])
.current_dir(tmp.path())
.output()
.expect("git add");
Command::new("git")
.args(["commit", "-m", "add lib.rs"])
.current_dir(tmp.path())
.output()
.expect("git commit");
let huskies_dir = tmp.path().join(".huskies");
std::fs::create_dir_all(&huskies_dir).unwrap();
let map_path = huskies_dir.join("source-map.json");
// Simulate what spawn does: update_for_worktree with base = initial commit
let result = update_for_worktree(tmp.path(), "HEAD~1", &map_path);
assert!(
result.is_ok(),
"update_for_worktree failed: {:?}",
result.err()
);
// The map file must exist at the expected path
assert!(
map_path.exists(),
"source map must be written at .huskies/source-map.json"
);
let content = std::fs::read_to_string(&map_path).unwrap();
let _ = rs_path; // used above
assert!(
content.contains("lib.rs"),
"map must contain the documented file"
);
assert!(
content.contains("fn greet"),
"map must list the documented function"
);
}
}