huskies: merge 1149 story huskies health chat command — surface gateway, sled, matrix, creds, and build-hash status

This commit is contained in:
dave
2026-05-19 20:07:03 +00:00
parent 5d0801854c
commit 9a286315a3
6 changed files with 732 additions and 1 deletions
+15
View File
@@ -269,6 +269,11 @@ pub fn commands() -> &'static [BotCommand] {
description: "List orphaned worktrees (dry run), or `cleanup_worktrees --confirm` to remove them",
handler: handle_cleanup_worktrees_fallback,
},
BotCommand {
name: "health",
description: "Show subsystem health: gateway, sled, matrix-sync, creds, and build-hash",
handler: handle_health_fallback,
},
BotCommand {
name: "new",
description: "Bootstrap a new project container (gateway only): `new project <name>`",
@@ -446,6 +451,16 @@ fn handle_project_rebuild_fallback(_ctx: &CommandContext) -> Option<String> {
None
}
/// Fallback handler for the `health` command when it is not intercepted by the
/// async handler in `on_room_message`. In practice this is never called — health
/// is detected and handled before `try_handle_command` is invoked. The entry
/// exists in the registry only so `help` lists it.
///
/// Returns `None` to prevent the LLM from receiving "health" as a prompt.
fn handle_health_fallback(_ctx: &CommandContext) -> Option<String> {
None
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
@@ -6,6 +6,7 @@ use crate::services::Services;
use matrix_sdk::ruma::{OwnedEventId, OwnedRoomId, OwnedUserId};
use std::collections::{BTreeMap, HashSet, VecDeque};
use std::sync::Arc;
use std::sync::atomic::AtomicI64;
use tokio::sync::Mutex as TokioMutex;
use tokio::sync::RwLock;
@@ -104,6 +105,10 @@ pub struct BotContext {
/// Used by the "rebuild gateway" command to construct the health-check URL
/// passed to the trampoline. `None` in standalone single-project mode.
pub gateway_port: Option<u16>,
/// Timestamp (ms since Unix epoch) of the last Matrix event received in any
/// configured room. Updated atomically on every `on_room_message` call so
/// the `health` command can detect a stale or dead sync loop.
pub last_matrix_event_ms: Arc<AtomicI64>,
}
impl BotContext {
@@ -299,6 +304,7 @@ mod tests {
SEEN_EVENT_IDS_CAP,
))),
gateway_port: None,
last_matrix_event_ms: Arc::new(AtomicI64::new(chrono::Utc::now().timestamp_millis())),
}
}
@@ -19,6 +19,20 @@ use super::super::verification::check_sender_verified;
use super::handle_message;
/// Return `true` when the message is a `health` command addressed to the bot.
///
/// Recognised case-insensitively as the single word `health` after stripping the bot
/// mention prefix. Any trailing whitespace is ignored; extra arguments are not
/// expected and are silently discarded.
fn extract_health_command(message: &str, bot_name: &str, bot_user_id: &str) -> bool {
let stripped = crate::chat::util::strip_bot_mention(message, bot_name, bot_user_id);
let trimmed = stripped
.trim()
.trim_start_matches(|c: char| !c.is_alphanumeric());
let cmd = trimmed.split_whitespace().next().unwrap_or("");
cmd.eq_ignore_ascii_case("health")
}
/// Return `true` when the message is a "rebuild gateway" command addressed to the bot.
///
/// The command is recognised case-insensitively as `rebuild gateway` after stripping
@@ -100,6 +114,12 @@ pub(in crate::chat::transport::matrix::bot) async fn on_room_message(
return;
}
// Update last-event timestamp so the `health` command can detect a stale sync loop.
ctx.last_matrix_event_ms.store(
chrono::Utc::now().timestamp_millis(),
std::sync::atomic::Ordering::Relaxed,
);
// Ignore the bot's own messages to prevent echo loops.
if ev.sender == ctx.matrix_user_id {
return;
@@ -249,6 +269,7 @@ pub(in crate::chat::transport::matrix::bot) async fn on_room_message(
"config",
"project-rebuild",
"upgrade",
"health",
];
let stripped = crate::chat::util::strip_bot_mention(
@@ -546,6 +567,26 @@ pub(in crate::chat::transport::matrix::bot) async fn on_room_message(
return;
}
// `health` — async subsystem health report (gateway + standalone).
if extract_health_command(
&user_message,
&ctx.services.bot_name,
ctx.matrix_user_id.as_str(),
) {
slog!("[matrix-bot] Handling 'health' from {sender}");
let response = super::super::super::health::run_health_check(&ctx).await;
let html = markdown_to_html(&response);
if let Ok(msg_id) = ctx
.transport
.send_message(&room_id_str, &response, &html)
.await
&& let Ok(event_id) = msg_id.parse()
{
ctx.bot_sent_event_ids.lock().await.insert(event_id);
}
return;
}
// Check for bot-level commands (help, status, ambient, …) before invoking
// the LLM. All commands are registered in commands.rs — no special-casing
// needed here.
+2 -1
View File
@@ -6,7 +6,7 @@ use matrix_sdk::ruma::OwnedRoomId;
use matrix_sdk::{Client, LoopCtrl, config::SyncSettings};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering};
use tokio::sync::Mutex as TokioMutex;
use tokio::sync::{RwLock, watch};
@@ -336,6 +336,7 @@ pub async fn run_bot(
super::context::SEEN_EVENT_IDS_CAP,
))),
gateway_port,
last_matrix_event_ms: Arc::new(AtomicI64::new(chrono::Utc::now().timestamp_millis())),
};
slog!(
+666
View File
@@ -0,0 +1,666 @@
//! `health` chat command — surface gateway, sled, matrix, creds, and build-hash status.
//!
//! Runs one check per subsystem concurrently (each with a 5-second timeout) and
//! returns a compact report: one line per subsystem with PASS / WARN / FAIL and a
//! remediation hint on every non-PASS row. Output is capped at 20 lines; when
//! more lines would be produced, the oldest WARN rows are dropped first.
use crate::chat::transport::matrix::bot::context::BotContext;
use std::collections::BTreeMap;
use std::sync::atomic::Ordering;
use std::time::Duration;
use tokio::time::timeout;
// ── Status ─────────────────────────────────────────────────────────────────────
/// Health status for a single subsystem.
#[derive(Debug, Clone, PartialEq)]
enum Status {
/// Subsystem is operating normally.
Pass,
/// Subsystem is degraded but not fully broken.
Warn,
/// Subsystem has failed and needs intervention.
Fail,
}
// ── HealthLine ─────────────────────────────────────────────────────────────────
/// One output row from the health check.
#[derive(Debug, Clone)]
struct HealthLine {
subsystem: String,
status: Status,
/// Short description of why the check is non-PASS.
detail: Option<String>,
/// Remediation hint shown after " — " on WARN/FAIL rows.
hint: Option<String>,
}
impl HealthLine {
fn pass(subsystem: impl Into<String>) -> Self {
Self {
subsystem: subsystem.into(),
status: Status::Pass,
detail: None,
hint: None,
}
}
fn warn(
subsystem: impl Into<String>,
detail: impl Into<String>,
hint: impl Into<String>,
) -> Self {
Self {
subsystem: subsystem.into(),
status: Status::Warn,
detail: Some(detail.into()),
hint: Some(hint.into()),
}
}
fn fail(
subsystem: impl Into<String>,
detail: impl Into<String>,
hint: impl Into<String>,
) -> Self {
Self {
subsystem: subsystem.into(),
status: Status::Fail,
detail: Some(detail.into()),
hint: Some(hint.into()),
}
}
/// Format as a single Markdown-friendly line.
fn format(&self) -> String {
let status = match self.status {
Status::Pass => "PASS",
Status::Warn => "WARN",
Status::Fail => "FAIL",
};
match (&self.detail, &self.hint) {
(Some(d), Some(h)) => format!("{} {}: {}{}", self.subsystem, status, d, h),
(Some(d), None) => format!("{} {}: {}", self.subsystem, status, d),
(None, None) => format!("{} {}", self.subsystem, status),
(None, Some(h)) => format!("{} {}: — {}", self.subsystem, status, h),
}
}
}
// ── Truncation ────────────────────────────────────────────────────────────────
/// Maximum number of output lines before truncation.
const MAX_LINES: usize = 20;
/// Truncate to ≤ MAX_LINES by removing the oldest (first in order) WARN rows.
fn truncate_lines(mut lines: Vec<HealthLine>) -> Vec<HealthLine> {
while lines.len() > MAX_LINES {
if let Some(pos) = lines.iter().position(|l| l.status == Status::Warn) {
lines.remove(pos);
} else {
break;
}
}
lines
}
// ── Individual checks ────────────────────────────────────────────────────────
/// Check the `perm_rx` receiver — PASS when the permission listener holds the lock,
/// FAIL when no task is holding it (listener has died or was never started).
fn check_perm_rx(ctx: &BotContext) -> HealthLine {
if ctx.services.perm_rx.try_lock().is_err() {
HealthLine::pass("perm_rx")
} else {
HealthLine::fail("perm_rx", "listener not holding lock", "restart bot")
}
}
/// Check the Matrix sync loop by measuring the age of the last received event.
///
/// WARN after 60 s of silence, FAIL after 120 s. The timestamp is updated by
/// `on_room_message` on every incoming event so receiving the health command
/// itself resets the clock.
fn check_matrix_sync(ctx: &BotContext) -> HealthLine {
let last_ms = ctx.last_matrix_event_ms.load(Ordering::Relaxed);
let age_secs = (chrono::Utc::now().timestamp_millis() - last_ms).max(0) / 1000;
if age_secs < 60 {
HealthLine::pass("matrix-sync")
} else if age_secs < 120 {
HealthLine::warn(
"matrix-sync",
format!("no events in {age_secs}s"),
"check sync loop — may be a quiet room",
)
} else {
HealthLine::fail(
"matrix-sync",
format!("no events in {age_secs}s"),
"sync loop may be dead — restart bot",
)
}
}
/// Check LLM credentials (`~/.claude/.credentials.json`).
///
/// FAIL if the file is missing or unreadable, FAIL if the access token is
/// expired, WARN if it expires within the next 7 days.
fn check_creds() -> HealthLine {
match crate::llm::oauth::read_credentials() {
Err(e) => HealthLine::fail("creds", e, "run `claude login`"),
Ok(creds) => {
let now_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let expires_at = creds.claude_ai_oauth.expires_at;
if expires_at < now_secs {
HealthLine::fail("creds", "token expired", "run `claude login` to refresh")
} else {
let days_left = (expires_at - now_secs) / 86400;
if days_left < 7 {
HealthLine::warn(
"creds",
format!("token expires in {days_left}d"),
"run `claude login` to refresh",
)
} else {
HealthLine::pass("creds")
}
}
}
}
}
/// Compare the compile-time build hash against the current HEAD of the workspace.
///
/// WARN when master has advanced past the running binary's commit (a rebuild is
/// available but not urgent). PASS when hashes match or HEAD cannot be read.
async fn check_build_hash(project_root: &std::path::Path) -> HealthLine {
let running = option_env!("BUILD_GIT_HASH").unwrap_or("unknown");
// Read current HEAD from git (non-blocking, run in a spawn_blocking call).
let repo_root = project_root.to_path_buf();
let head = tokio::task::spawn_blocking(move || {
std::process::Command::new("git")
.args(["rev-parse", "--short", "HEAD"])
.current_dir(&repo_root)
.output()
.ok()
.filter(|o| o.status.success())
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_string())
})
.await
.unwrap_or(None);
match head {
None => HealthLine::pass("build-hash"),
Some(ref head_hash) => {
if running == "unknown" || head_hash == running {
HealthLine::pass("build-hash")
} else {
HealthLine::warn(
"build-hash",
format!("running {running}, HEAD is {head_hash}"),
"run `rebuild` to update",
)
}
}
}
}
/// Check each registered sled's `/health` endpoint with a 5-second timeout.
///
/// Returns one [`HealthLine`] per sled. PASS when the sled responds with HTTP
/// 2xx; FAIL when the request times out or returns an error status.
async fn check_sleds(
store: &tokio::sync::RwLock<BTreeMap<String, crate::service::gateway::config::ProjectEntry>>,
) -> Vec<HealthLine> {
let entries: Vec<(String, Option<String>)> = store
.read()
.await
.iter()
.map(|(n, e)| (n.clone(), e.url.clone()))
.collect();
if entries.is_empty() {
return vec![HealthLine::warn(
"sled",
"no sleds registered",
"add projects to projects.toml",
)];
}
let client = reqwest::Client::new();
let mut lines = Vec::new();
for (name, url_opt) in entries {
let subsystem = format!("sled:{name}");
let line = match url_opt {
None => HealthLine::warn(subsystem, "no URL configured", "set url in projects.toml"),
Some(url) => {
let health_url = format!("{}/health", url.trim_end_matches('/'));
let result = timeout(Duration::from_secs(5), client.get(&health_url).send()).await;
match result {
Err(_) => {
HealthLine::fail(subsystem, "timed out", "check container is running")
}
Ok(Err(e)) => HealthLine::fail(
subsystem,
format!("unreachable: {}", short_error(&e.to_string())),
"check container is running",
),
Ok(Ok(resp)) if resp.status().is_success() => HealthLine::pass(subsystem),
Ok(Ok(resp)) => HealthLine::fail(
subsystem,
format!("HTTP {}", resp.status().as_u16()),
"check container logs",
),
}
}
};
lines.push(line);
}
lines
}
/// Check the gateway process: pidfile validity and (on macOS) binary codesign.
///
/// PASS when our PID is recorded in the pidfile. On macOS, also verifies that
/// `~/bin/huskies-bin` has a valid ad-hoc signature; FAIL with a `script/local-release`
/// hint when it does not.
fn check_gateway_process() -> HealthLine {
// Verify that the pidfile records our PID (i.e. this IS the live gateway).
let pidfile_ok = check_pidfile_matches_self();
// On macOS, verify the installed binary is codesigned.
#[cfg(target_os = "macos")]
{
if !check_codesign_macos() {
return HealthLine::fail(
"gateway-process",
"codesign invalid",
"run `script/local-release`",
);
}
}
if !pidfile_ok {
return HealthLine::warn(
"gateway-process",
"pidfile missing or stale",
"restart gateway with --gateway flag",
);
}
HealthLine::pass("gateway-process")
}
/// Return `true` when `$HOME/.huskies/gateway.pid` exists and contains our PID.
fn check_pidfile_matches_self() -> bool {
let home = homedir::my_home().ok().flatten();
let home = match home {
Some(h) => h,
None => return false,
};
let path = home.join(".huskies").join("gateway.pid");
let content = std::fs::read_to_string(&path).unwrap_or_default();
content.trim().parse::<u32>().unwrap_or(0) == std::process::id()
}
/// On macOS, return `true` when `~/bin/huskies-bin` passes `codesign --verify`.
///
/// Falls back to the current executable when `~/bin/huskies-bin` does not exist.
/// Returns `true` (assume ok) if the `codesign` tool is unavailable.
#[cfg(target_os = "macos")]
fn check_codesign_macos() -> bool {
let target = if let Ok(home) = std::env::var("HOME") {
let installed = std::path::PathBuf::from(home)
.join("bin")
.join("huskies-bin");
if installed.exists() {
installed
} else {
match std::env::current_exe() {
Ok(p) => p,
Err(_) => return true,
}
}
} else {
match std::env::current_exe() {
Ok(p) => p,
Err(_) => return true,
}
};
std::process::Command::new("codesign")
.args(["--verify", "--quiet", target.to_str().unwrap_or("")])
.output()
.map(|o| o.status.success())
.unwrap_or(true)
}
// ── Entry point ────────────────────────────────────────────────────────────────
/// Run all health checks and return a formatted Markdown report (≤ 20 lines).
///
/// Gateway-specific checks (gateway-process, per-sled probes) are included
/// only when running in gateway mode. All other checks run in every mode.
pub async fn run_health_check(ctx: &BotContext) -> String {
let mut lines: Vec<HealthLine> = Vec::new();
// Gateway-only checks
if ctx.is_gateway() {
lines.push(check_gateway_process());
if let Some(ref store) = ctx.gateway_projects_store {
lines.extend(check_sleds(store).await);
}
}
// Shared checks — run concurrently where possible.
let perm_line = check_perm_rx(ctx);
let sync_line = check_matrix_sync(ctx);
let creds_line = check_creds();
let hash_line = check_build_hash(&ctx.services.project_root).await;
lines.push(perm_line);
lines.push(sync_line);
lines.push(creds_line);
lines.push(hash_line);
let lines = truncate_lines(lines);
lines
.iter()
.map(|l| l.format())
.collect::<Vec<_>>()
.join("\n")
}
// ── Utilities ────────────────────────────────────────────────────────────────
/// Shorten a long error string to the first 60 characters for compact display.
fn short_error(s: &str) -> String {
s.chars().take(60).collect()
}
// ── Tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
// -- HealthLine formatting ------------------------------------------------
#[test]
fn pass_line_formats_without_detail() {
let line = HealthLine::pass("perm_rx");
assert_eq!(line.format(), "perm_rx PASS");
}
#[test]
fn fail_line_formats_with_detail_and_hint() {
let line = HealthLine::fail(
"gateway-process",
"codesign invalid",
"run script/local-release",
);
assert_eq!(
line.format(),
"gateway-process FAIL: codesign invalid — run script/local-release"
);
}
#[test]
fn warn_line_formats_with_detail_and_hint() {
let line = HealthLine::warn("build-hash", "running abc, HEAD is def", "run rebuild");
assert_eq!(
line.format(),
"build-hash WARN: running abc, HEAD is def — run rebuild"
);
}
// -- Truncation -----------------------------------------------------------
#[test]
fn truncate_drops_oldest_warn_first() {
let mut lines: Vec<HealthLine> = (0..22)
.map(|i| {
if i % 3 == 0 {
HealthLine::fail(format!("sled:{i}"), "down", "fix it")
} else {
HealthLine::warn(format!("check:{i}"), "slow", "investigate")
}
})
.collect();
// Manually insert a known WARN at position 0 and a FAIL at position 1
lines.insert(0, HealthLine::warn("oldest-warn", "stale", "restart"));
lines.insert(1, HealthLine::fail("important-fail", "broken", "fix"));
let result = truncate_lines(lines.clone());
assert!(
result.len() <= MAX_LINES,
"output must be ≤ {MAX_LINES} lines"
);
// FAILs must be preserved.
let fail_count = result.iter().filter(|l| l.status == Status::Fail).count();
let orig_fail_count = lines.iter().filter(|l| l.status == Status::Fail).count();
assert_eq!(
fail_count,
orig_fail_count.min(MAX_LINES),
"all FAIL lines must be kept when they fit"
);
}
#[test]
fn truncate_noop_when_under_limit() {
let lines: Vec<HealthLine> = (0..5).map(|i| HealthLine::pass(format!("s{i}"))).collect();
let result = truncate_lines(lines.clone());
assert_eq!(result.len(), 5);
}
#[test]
fn truncate_stops_at_fails_when_no_warns_left() {
// 25 FAIL lines — nothing to drop; output is clamped at MAX_LINES.
let lines: Vec<HealthLine> = (0..25)
.map(|i| HealthLine::fail(format!("s{i}"), "broken", "fix"))
.collect();
let result = truncate_lines(lines);
// When only FAILs are present, truncation stops because no WARNs can be removed.
assert_eq!(result.len(), 25, "FAILs are never dropped by truncation");
}
// -- perm_rx check --------------------------------------------------------
#[tokio::test]
async fn perm_rx_pass_when_locked() {
use crate::services::Services;
use std::sync::Arc;
use tokio::sync::Mutex as TokioMutex;
let (perm_tx, perm_rx) = tokio::sync::mpsc::unbounded_channel();
let perm_rx_arc = Arc::new(TokioMutex::new(perm_rx));
// Acquire the lock to simulate the permission listener holding it.
let _guard = perm_rx_arc.try_lock().unwrap();
// Build a minimal services bundle referencing our locked perm_rx.
let services = Arc::new(Services {
project_root: std::path::PathBuf::from("/tmp"),
agents: Arc::new(crate::agents::AgentPool::new_test(3000)),
bot_name: "test".to_string(),
bot_user_id: "@bot:test".to_string(),
ambient_rooms: Arc::new(std::sync::Mutex::new(std::collections::HashSet::new())),
perm_rx: Arc::clone(&perm_rx_arc),
pending_perm_replies: Arc::new(TokioMutex::new(std::collections::HashMap::new())),
permission_timeout_secs: 120,
status: Arc::new(crate::service::status::StatusBroadcaster::new()),
chat_dispatcher: Arc::new(crate::chat::dispatcher::ChatDispatcher::new(1_500)),
});
// Build a minimal BotContext just to pass services.
let ctx = make_test_ctx(services);
let line = check_perm_rx(&ctx);
assert_eq!(
line.status,
Status::Pass,
"perm_rx should PASS when a task holds the lock"
);
drop(perm_tx); // suppress unused warning
}
#[tokio::test]
async fn perm_rx_fail_when_unlocked() {
use crate::services::Services;
use std::sync::Arc;
use tokio::sync::Mutex as TokioMutex;
let (_perm_tx, perm_rx) = tokio::sync::mpsc::unbounded_channel();
let perm_rx_arc = Arc::new(TokioMutex::new(perm_rx));
// Lock is NOT held by anyone.
let services = Arc::new(Services {
project_root: std::path::PathBuf::from("/tmp"),
agents: Arc::new(crate::agents::AgentPool::new_test(3000)),
bot_name: "test".to_string(),
bot_user_id: "@bot:test".to_string(),
ambient_rooms: Arc::new(std::sync::Mutex::new(std::collections::HashSet::new())),
perm_rx: Arc::clone(&perm_rx_arc),
pending_perm_replies: Arc::new(TokioMutex::new(std::collections::HashMap::new())),
permission_timeout_secs: 120,
status: Arc::new(crate::service::status::StatusBroadcaster::new()),
chat_dispatcher: Arc::new(crate::chat::dispatcher::ChatDispatcher::new(1_500)),
});
let ctx = make_test_ctx(services);
let line = check_perm_rx(&ctx);
assert_eq!(
line.status,
Status::Fail,
"perm_rx should FAIL when no task holds the lock"
);
}
// -- matrix-sync check ----------------------------------------------------
#[tokio::test]
async fn matrix_sync_pass_when_recent() {
let services = crate::services::Services::new_test(
std::path::PathBuf::from("/tmp"),
"bot".to_string(),
);
let ctx = make_test_ctx(services);
// Set last event to just now.
ctx.last_matrix_event_ms
.store(chrono::Utc::now().timestamp_millis(), Ordering::Relaxed);
let line = check_matrix_sync(&ctx);
assert_eq!(line.status, Status::Pass);
}
#[tokio::test]
async fn matrix_sync_fail_when_stale() {
let services = crate::services::Services::new_test(
std::path::PathBuf::from("/tmp"),
"bot".to_string(),
);
let ctx = make_test_ctx(services);
// Simulate 200 seconds of silence.
let old_ms = chrono::Utc::now().timestamp_millis() - 200_000;
ctx.last_matrix_event_ms.store(old_ms, Ordering::Relaxed);
let line = check_matrix_sync(&ctx);
assert_eq!(line.status, Status::Fail);
assert!(
line.detail.as_deref().unwrap_or("").contains("200s")
|| line.detail.as_deref().unwrap_or("").contains("s"),
"detail should mention age in seconds"
);
}
// -- creds check ----------------------------------------------------------
#[test]
fn creds_fail_when_file_missing() {
// In the test environment there is unlikely to be a ~/.claude/.credentials.json
// with a valid non-expired token, so we just confirm the function returns a
// HealthLine without panicking.
let line = check_creds();
// We don't assert a specific status — the check should not panic.
let _ = line.format();
}
// -- build_hash check -----------------------------------------------------
#[tokio::test]
async fn build_hash_pass_when_git_unavailable() {
// In a test environment without a git repo at /tmp/nonexistent, the check
// should gracefully return PASS rather than panicking.
let line = check_build_hash(std::path::Path::new("/tmp/nonexistent")).await;
// Should either PASS or produce a sensible result — must not panic.
let _ = line.format();
}
// -- health command registration ------------------------------------------
#[test]
fn health_command_registered_in_commands() {
let cmds = crate::chat::commands::commands();
assert!(
cmds.iter().any(|c| c.name == "health"),
"health must be registered in commands()"
);
}
#[test]
fn health_command_has_description() {
let cmds = crate::chat::commands::commands();
let cmd = cmds.iter().find(|c| c.name == "health").unwrap();
assert!(!cmd.description.is_empty());
}
// -- Helper ---------------------------------------------------------------
/// Build a minimal `BotContext` for testing purposes.
fn make_test_ctx(services: std::sync::Arc<crate::services::Services>) -> BotContext {
use std::collections::HashSet;
use std::sync::Arc;
use std::sync::atomic::AtomicI64;
use tokio::sync::Mutex as TokioMutex;
BotContext {
services,
matrix_user_id: "@bot:example.com".parse().unwrap(),
target_room_ids: vec![],
allowed_users: vec![],
history: Arc::new(TokioMutex::new(std::collections::HashMap::new())),
history_size: 20,
bot_sent_event_ids: Arc::new(TokioMutex::new(HashSet::new())),
htop_sessions: Arc::new(TokioMutex::new(std::collections::HashMap::new())),
transport: Arc::new(crate::chat::transport::whatsapp::WhatsAppTransport::new(
"test-phone".to_string(),
"test-token".to_string(),
"pipeline_notification".to_string(),
)),
timer_store: Arc::new(crate::service::timer::TimerStore::load(
std::path::PathBuf::from("/tmp/timers-health.json"),
)),
gateway_active_project: None,
gateway_projects_store: None,
handled_incoming_event_ids: Arc::new(TokioMutex::new(
crate::chat::transport::matrix::bot::context::SeenEventIds::new(
crate::chat::transport::matrix::bot::context::SEEN_EVENT_IDS_CAP,
),
)),
gateway_port: None,
last_matrix_event_ms: Arc::new(AtomicI64::new(chrono::Utc::now().timestamp_millis())),
}
}
}
+2
View File
@@ -25,6 +25,8 @@ pub mod commands;
pub(crate) mod config;
/// Story deletion command — handles `!delete` bot commands to remove work items.
pub mod delete;
/// `health` chat command — surface gateway, sled, matrix, creds, and build-hash status.
pub mod health;
/// htop-style agent monitor command — renders a live process table in Matrix.
pub mod htop;
/// `new project <name>` chat command — Phase 1 gateway project bootstrap.