Files
huskies/server/src/chat/commands/loc.rs
T

475 lines
17 KiB
Rust

//! Handler for the `loc` command — top source files by line count.
use super::CommandContext;
use walkdir::WalkDir;
const DEFAULT_TOP_N: usize = 10;
/// Directories to skip during traversal.
const SKIP_DIRS: &[&str] = &[
"target",
"node_modules",
".git",
"dist",
"build",
".next",
"coverage",
"test-results",
];
/// Path components that indicate a worktree path that should be skipped.
const SKIP_PATH_COMPONENTS: &[&str] = &[".huskies/worktrees"];
/// Known-huge or machine-generated files that are excluded from the loc count
/// even when they have a recognised source extension (e.g. `.json`, `.yaml`).
/// Add entries here to extend the exclusion list.
const EXCLUDED_FILENAMES: &[&str] = &[
"package-lock.json",
"yarn.lock",
"pnpm-lock.yaml",
"bun.lockb",
"Cargo.lock",
"composer.lock",
"Gemfile.lock",
"poetry.lock",
"go.sum",
"go.work.sum",
"flake.lock",
];
pub(super) fn handle_loc(ctx: &CommandContext) -> Option<String> {
let args = ctx.args.trim();
if args.is_empty() {
return Some(loc_top_n(ctx.project_root, DEFAULT_TOP_N));
}
let first_token = args.split_whitespace().next().unwrap_or("");
Some(match first_token.parse::<usize>() {
Ok(0) => format!(
"Usage: `loc [N]` or `loc <filepath>` — show top N source files by line count (default {DEFAULT_TOP_N}), or line count for a specific file"
),
Ok(n) => loc_top_n(ctx.project_root, n),
Err(_) => loc_single_file(ctx.project_root, args),
})
}
/// Count lines in a single file resolved relative to `project_root`.
pub(crate) fn loc_single_file(project_root: &std::path::Path, file_arg: &str) -> String {
let path = if std::path::Path::new(file_arg).is_absolute() {
std::path::PathBuf::from(file_arg)
} else {
project_root.join(file_arg)
};
match std::fs::read_to_string(&path) {
Ok(content) => {
let lines = content.lines().count();
let display = path
.strip_prefix(project_root)
.unwrap_or(&path)
.to_string_lossy();
format!("`{display}` — {lines} lines")
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
format!("File not found: `{file_arg}`")
}
Err(e) => format!("Error reading `{file_arg}`: {e}"),
}
}
fn loc_top_n(project_root: &std::path::Path, top_n: usize) -> String {
let mut files: Vec<(usize, String)> = WalkDir::new(project_root)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
if e.file_type().is_dir() {
let name = e.file_name().to_string_lossy();
if SKIP_DIRS.iter().any(|s| *s == name.as_ref()) {
return false;
}
// Skip .huskies/worktrees — use relative path so the check
// doesn't exclude the project root itself when running
// from inside a worktree (where the absolute path contains
// ".huskies/worktrees").
let rel = e
.path()
.strip_prefix(project_root)
.map(|p| p.to_string_lossy().into_owned())
.unwrap_or_default();
if SKIP_PATH_COMPONENTS.iter().any(|s| rel.contains(s)) {
return false;
}
}
true
})
.filter_map(|entry| {
let entry = entry.ok()?;
if !entry.file_type().is_file() {
return None;
}
let path = entry.path();
// Skip known-huge or machine-generated files (lockfiles, etc.).
let filename = path.file_name().and_then(|f| f.to_str()).unwrap_or("");
if EXCLUDED_FILENAMES.contains(&filename) {
return None;
}
// Skip binary/generated files without a recognisable text extension.
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !is_source_extension(ext) {
return None;
}
let content = std::fs::read_to_string(path).ok()?;
let line_count = content.lines().count();
if line_count == 0 {
return None;
}
// Make path relative to project_root for display.
let rel = path
.strip_prefix(project_root)
.unwrap_or(path)
.to_string_lossy()
.into_owned();
Some((line_count, rel))
})
.collect();
files.sort_by(|a, b| b.0.cmp(&a.0));
files.truncate(top_n);
if files.is_empty() {
return "No source files found.".to_string();
}
let mut out = format!("**Top {} files by line count**\n\n", files.len());
for (rank, (lines, path)) in files.iter().enumerate() {
out.push_str(&format!("{}. `{}` — {} lines\n", rank + 1, path, lines));
}
out
}
/// Returns true for file extensions considered source/text files.
fn is_source_extension(ext: &str) -> bool {
matches!(
ext,
"rs" | "ts" | "tsx" | "js" | "jsx" | "py" | "go" | "java" | "c" | "cpp" | "h"
| "hpp" | "cs" | "rb" | "swift" | "kt" | "scala" | "hs" | "ml" | "ex" | "exs"
| "clj" | "lua" | "sh" | "bash" | "zsh" | "fish" | "ps1" | "toml" | "yaml"
| "yml" | "json" | "md" | "html" | "css" | "scss" | "less" | "sql" | "graphql"
| "proto" | "tf" | "hcl" | "nix" | "r" | "jl" | "dart" | "vue" | "svelte"
)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::agents::AgentPool;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
fn make_ctx<'a>(
agents: &'a Arc<AgentPool>,
ambient_rooms: &'a Arc<Mutex<HashSet<String>>>,
project_root: &'a std::path::Path,
args: &'a str,
) -> super::super::CommandContext<'a> {
super::super::CommandContext {
bot_name: "Timmy",
args,
project_root,
agents,
ambient_rooms,
room_id: "!test:example.com",
}
}
#[test]
fn loc_command_is_registered() {
use super::super::commands;
let found = commands().iter().any(|c| c.name == "loc");
assert!(found, "loc command must be in the registry");
}
#[test]
fn loc_command_appears_in_help() {
let result = super::super::tests::try_cmd_addressed(
"Timmy",
"@timmy:homeserver.local",
"@timmy help",
);
let output = result.unwrap();
assert!(output.contains("loc"), "help should list loc command: {output}");
}
#[test]
fn loc_default_returns_top_10() {
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or(std::path::Path::new("."));
let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "");
let output = handle_loc(&ctx).unwrap();
assert!(
output.contains("Top"),
"output should contain 'Top': {output}"
);
// At most 10 entries (numbered lines "1." through "10.")
let count = output.lines().filter(|l| l.contains(". `")).count();
assert!(count <= 10, "default should return at most 10 files, got {count}");
}
#[test]
fn loc_with_arg_5_returns_at_most_5() {
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or(std::path::Path::new("."));
let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "5");
let output = handle_loc(&ctx).unwrap();
let count = output.lines().filter(|l| l.contains(". `")).count();
assert!(count <= 5, "loc 5 should return at most 5 files, got {count}");
}
#[test]
fn loc_with_arg_20_returns_at_most_20() {
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or(std::path::Path::new("."));
let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "20");
let output = handle_loc(&ctx).unwrap();
let count = output.lines().filter(|l| l.contains(". `")).count();
assert!(count <= 20, "loc 20 should return at most 20 files, got {count}");
}
#[test]
fn loc_output_contains_rank_and_line_count() {
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or(std::path::Path::new("."));
let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "");
let output = handle_loc(&ctx).unwrap();
// Each entry should have "N. `path` — N lines"
assert!(
output.contains("1. `"),
"first result should start with rank: {output}"
);
assert!(
output.contains("lines"),
"output should mention 'lines': {output}"
);
}
#[test]
fn loc_zero_arg_returns_usage() {
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or(std::path::Path::new("."));
let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "0");
let output = handle_loc(&ctx).unwrap();
assert!(
output.contains("Usage"),
"loc 0 should show usage: {output}"
);
}
#[test]
fn loc_filepath_returns_line_count() {
use std::io::Write as _;
let dir = tempfile::tempdir().expect("tempdir");
let src = dir.path().join("hello.rs");
{
let mut f = std::fs::File::create(&src).unwrap();
for i in 0..42 {
writeln!(f, "fn line_{i}() {{}}").unwrap();
}
}
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "hello.rs");
let output = handle_loc(&ctx).unwrap();
assert!(
output.contains("42"),
"should report 42 lines for hello.rs: {output}"
);
assert!(
output.contains("hello.rs"),
"output should mention the filename: {output}"
);
}
#[test]
fn loc_filepath_nonexistent_returns_error() {
let dir = tempfile::tempdir().expect("tempdir");
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "does_not_exist.rs");
let output = handle_loc(&ctx).unwrap();
assert!(
output.contains("not found") || output.contains("Error"),
"nonexistent file should return a clear error: {output}"
);
}
#[test]
fn loc_skips_worktrees_directory() {
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let repo_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or(std::path::Path::new("."));
let ctx = make_ctx(&agents, &ambient_rooms, repo_root, "");
let output = handle_loc(&ctx).unwrap();
assert!(
!output.contains(".huskies/worktrees"),
"output must not include paths inside worktrees: {output}"
);
}
#[test]
fn loc_skips_target_directory() {
use std::io::Write as _;
let dir = tempfile::tempdir().expect("tempdir");
// Create a target/ subdirectory with a .rs file — it must NOT appear.
let target_dir = dir.path().join("target");
std::fs::create_dir(&target_dir).unwrap();
let hidden = target_dir.join("huge_generated.rs");
{
let mut f = std::fs::File::create(&hidden).unwrap();
for i in 0..1000 {
writeln!(f, "fn generated_{i}() {{}}").unwrap();
}
}
// Real source file at the root — must appear.
let source = dir.path().join("lib.rs");
{
let mut f = std::fs::File::create(&source).unwrap();
for i in 0..5 {
writeln!(f, "fn f{i}() {{}}").unwrap();
}
}
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "50");
let output = handle_loc(&ctx).unwrap();
assert!(
!output.contains("target/"),
"output must not include files under target/: {output}"
);
assert!(
output.contains("lib.rs"),
"lib.rs should appear in loc output: {output}"
);
}
#[test]
fn loc_excludes_lockfiles_from_results() {
use std::io::Write as _;
let dir = tempfile::tempdir().expect("tempdir");
// Write a package-lock.json with many lines — it must NOT appear in output.
let lockfile = dir.path().join("package-lock.json");
{
let mut f = std::fs::File::create(&lockfile).unwrap();
for _ in 0..500 {
writeln!(f, " \"line\": true,").unwrap();
}
}
// Write a real source file so the output is non-empty.
let source = dir.path().join("main.rs");
{
let mut f = std::fs::File::create(&source).unwrap();
for i in 0..20 {
writeln!(f, "fn line_{i}() {{}}").unwrap();
}
}
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "50");
let output = handle_loc(&ctx).unwrap();
assert!(
!output.contains("package-lock.json"),
"package-lock.json must be excluded from loc output: {output}"
);
assert!(
output.contains("main.rs"),
"main.rs should appear in loc output: {output}"
);
}
#[test]
fn loc_excludes_cargo_lock_from_results() {
use std::io::Write as _;
let dir = tempfile::tempdir().expect("tempdir");
// Cargo.lock has no recognised source extension so it would be skipped
// anyway — but we still verify EXCLUDED_FILENAMES contains it.
assert!(
EXCLUDED_FILENAMES.contains(&"Cargo.lock"),
"EXCLUDED_FILENAMES must contain Cargo.lock"
);
// Write a Cargo.lock with many lines and verify it is excluded.
let lockfile = dir.path().join("Cargo.lock");
{
let mut f = std::fs::File::create(&lockfile).unwrap();
for _ in 0..500 {
writeln!(f, "name = \"foo\"").unwrap();
}
}
let source = dir.path().join("lib.rs");
{
let mut f = std::fs::File::create(&source).unwrap();
for i in 0..10 {
std::io::Write::write_all(&mut f, format!("fn f{i}() {{}}\n").as_bytes()).unwrap();
}
}
let agents = Arc::new(AgentPool::new_test(3000));
let ambient_rooms = Arc::new(Mutex::new(HashSet::new()));
let ctx = make_ctx(&agents, &ambient_rooms, dir.path(), "50");
let output = handle_loc(&ctx).unwrap();
assert!(
!output.contains("Cargo.lock"),
"Cargo.lock must be excluded from loc output: {output}"
);
}
#[test]
fn excluded_filenames_constant_is_defined() {
// The constant must contain the examples from the story.
assert!(
EXCLUDED_FILENAMES.contains(&"package-lock.json"),
"EXCLUDED_FILENAMES must contain package-lock.json"
);
assert!(
EXCLUDED_FILENAMES.contains(&"Cargo.lock"),
"EXCLUDED_FILENAMES must contain Cargo.lock"
);
}
#[test]
fn loc_works_via_full_dispatch() {
// Verifies the command is reachable through the same dispatch path used
// by all transports (Matrix, WhatsApp, Slack).
let result = super::super::tests::try_cmd_addressed(
"Timmy",
"@timmy:homeserver.local",
"@timmy loc 1",
);
// /tmp has no source files, so we expect either "No source files found"
// or a ranked result — either way the command must respond (not None).
assert!(
result.is_some(),
"loc command must respond via dispatch (not fall through to LLM)"
);
}
}