huskies: merge 1019

This commit is contained in:
dave
2026-05-14 08:48:11 +00:00
parent ebf58ef224
commit e3f5875b8e
6 changed files with 157 additions and 61 deletions
+29 -36
View File
@@ -81,6 +81,18 @@ pub(crate) fn spawn_event_bridges(
// in-memory register whenever a story reaches a terminal stage.
crate::agents::pool::cost_rollup_subscriber::spawn_cost_rollup_subscriber(root.clone());
// Done→archived subscriber: archives Done stories after the configured
// retention period. Fires on each Stage::Done TransitionFired event and
// sleeps for the remaining retention time from merged_at before archiving.
// Replaces the periodic sweep_done_to_archived scan that ran on every
// sweep_interval_secs tick.
{
let done_retention = config::ProjectConfig::load(&root)
.map(|c| std::time::Duration::from_secs(c.watcher.done_retention_secs))
.unwrap_or_else(|_| std::time::Duration::from_secs(4 * 3600));
io::watcher::spawn_done_to_archived_subscriber(done_retention);
}
let watcher_auto_rx = watcher_tx.subscribe();
let watcher_auto_agents = Arc::clone(&agents);
tokio::spawn(async move {
@@ -100,28 +112,23 @@ pub(crate) fn spawn_event_bridges(
/// Spawn the unified 1-second background tick loop.
///
/// Fires due timers, runs the agent watchdog every 30 ticks, promotes
/// done→archived items every `sweep_interval_secs` ticks, and removes
/// orphaned worktrees every `worktree_sweep_interval_secs` ticks (default
/// 1200, i.e. 20 minutes).
/// Handles only genuinely time-based work:
/// - **Timer tick** (every second): fires due pipeline timers by comparing the
/// current wall-clock time against each timer's `due_at` field. Cannot be
/// reactive because timers encode absolute timestamps that only become
/// actionable when the clock reaches them.
/// - **Agent watchdog** (every 30 seconds): detects orphaned `Running` agents
/// by comparing elapsed time since the last heartbeat. Cannot be reactive
/// because the absence of an event (no heartbeat) is what signals a problem;
/// a `TransitionFired` subscriber would never fire for a silently crashed agent.
///
/// Stage-change-reactive work (done→archived archival, worktree cleanup) has
/// been moved to `TransitionFired` subscribers spawned from `spawn_event_bridges`.
pub(crate) fn spawn_tick_loop(
agents: Arc<AgentPool>,
timer_store: Arc<service::timer::TimerStore>,
root: Option<PathBuf>,
) {
let project_cfg = root
.as_ref()
.and_then(|r| config::ProjectConfig::load(r).ok());
let sweep_cfg = project_cfg
.as_ref()
.map(|c| c.watcher.clone())
.unwrap_or_default();
let sweep_every = sweep_cfg.sweep_interval_secs.max(1);
let done_retention = std::time::Duration::from_secs(sweep_cfg.done_retention_secs);
// Capture config for the worktree sweep (read once at startup).
let worktree_sweep_config = project_cfg.unwrap_or_default();
// Worktree orphan sweep: every 20 minutes by default.
let worktree_sweep_every: u64 = 1200;
let pending_count = timer_store.list().len();
crate::slog!("[tick] Unified tick loop started; {pending_count} pending timer(s)");
@@ -132,7 +139,8 @@ pub(crate) fn spawn_tick_loop(
interval.tick().await;
tick_count = tick_count.wrapping_add(1);
// Timer: fire due timers every second.
// Time-based: timers encode absolute due timestamps; only a
// wall-clock comparison can determine when one is due.
if let Some(ref r) = root {
let result = service::timer::tick_once(&timer_store, &agents, r).await;
if let Err(msg) = result {
@@ -140,8 +148,9 @@ pub(crate) fn spawn_tick_loop(
}
}
// Watchdog: detect orphaned Running agents every 30 ticks.
// Also reap stale Running merge_jobs from previous server instances.
// Time-based: the watchdog detects silence (no heartbeat within a
// timeout window). A `TransitionFired` subscriber cannot observe the
// absence of events, so this must remain on a periodic tick.
if tick_count.is_multiple_of(30) {
let found = agents.run_watchdog_pass(root.as_deref());
if found > 0 {
@@ -154,22 +163,6 @@ pub(crate) fn spawn_tick_loop(
}
agents.reap_stale_merge_jobs();
}
// Sweep: promote done→archived every sweep_interval_secs ticks.
if tick_count.is_multiple_of(sweep_every) {
io::watcher::sweep_done_to_archived(done_retention);
}
// Worktree orphan sweep: remove worktrees for done/archived/absent stories.
if tick_count.is_multiple_of(worktree_sweep_every)
&& let Some(ref r) = root
{
let removed =
crate::worktree::sweep_orphaned_worktrees(r, &worktree_sweep_config).await;
if removed > 0 {
crate::slog!("[worktree-sweep] Removed {removed} orphaned worktree(s).");
}
}
}
});
}