huskies: merge 1009

This commit is contained in:
dave
2026-05-13 22:50:13 +00:00
parent a5cd3a2152
commit 4e007bb770
56 changed files with 453 additions and 384 deletions
+30 -12
View File
@@ -78,39 +78,52 @@ mod tests {
#[test]
#[allow(clippy::string_slice)] // stale_holder is a hex/ASCII string literal; [..12] always valid
fn stale_claim_displaced_and_logged() {
use crate::crdt_state::{
init_for_test, our_node_id, read_item, write_claim, write_item_str,
};
use crate::crdt_state::{init_for_test, our_node_id, read_item, write_claim, write_item};
use crate::pipeline_state::{AgentClaim, AgentName, Stage};
use chrono::TimeZone;
init_for_test();
let story_id = "718_test_stale_displacement";
let stale_holder = "staledeadbeef0000000000000000000000000000";
// Place claimed_at well beyond the TTL so the claim is unambiguously stale.
let stale_time = chrono::Utc::now().timestamp() as f64 - CLAIM_TIMEOUT_SECS - 300.0;
let stale_time = chrono::Utc::now().timestamp() as u64 - CLAIM_TIMEOUT_SECS as u64 - 300;
// Seed the story with a stale claim from a foreign node.
write_item_str(
write_item(
story_id,
"2_current",
&Stage::Coding {
claim: Some(AgentClaim {
agent: AgentName(stale_holder.to_string()),
claimed_at: chrono::Utc
.timestamp_opt(stale_time as i64, 0)
.single()
.unwrap(),
}),
},
Some("Stale Claim Displacement Test"),
None,
None,
None,
Some(stale_holder),
Some(stale_time),
None,
);
// Confirm the stale claim is in place.
let before = read_item(story_id).expect("item should exist");
let before_claim = match before.stage() {
Stage::Coding { claim } => claim.as_ref(),
Stage::Merge { claim, .. } => claim.as_ref(),
_ => None,
};
assert_eq!(
before.claim().map(|c| c.node.as_str()),
before_claim.map(|c| c.agent.0.as_str()),
Some(stale_holder),
"pre-condition: item should be claimed by the stale holder"
);
let age = chrono::Utc::now().timestamp() as f64
- before.claim().map(|c| c.at as f64).unwrap_or(0.0);
- before_claim
.map(|c| c.claimed_at.timestamp() as f64)
.unwrap_or(0.0);
assert!(
age >= CLAIM_TIMEOUT_SECS,
"pre-condition: claim age ({age}s) must exceed TTL ({CLAIM_TIMEOUT_SECS}s)"
@@ -136,13 +149,18 @@ mod tests {
// Verify the new claim belongs to this node, not the stale holder.
let our_id = our_node_id().expect("node id should be available after init_for_test");
let after = read_item(story_id).expect("item should still exist");
let after_claim = match after.stage() {
Stage::Coding { claim } => claim.as_ref(),
Stage::Merge { claim, .. } => claim.as_ref(),
_ => None,
};
assert_eq!(
after.claim().map(|c| c.node.as_str()),
after_claim.map(|c| c.agent.0.as_str()),
Some(our_id.as_str()),
"new claim should have displaced the stale holder"
);
assert_ne!(
after.claim().map(|c| c.node.as_str()),
after_claim.map(|c| c.agent.0.as_str()),
Some(stale_holder),
"stale holder must no longer own the claim"
);
+22 -10
View File
@@ -44,7 +44,7 @@ pub(super) async fn scan_and_claim(
// Only claim stories in execution stages (Coding, Qa, Merge).
if !matches!(
item.stage(),
crate::pipeline_state::Stage::Coding
crate::pipeline_state::Stage::Coding { .. }
| crate::pipeline_state::Stage::Qa
| crate::pipeline_state::Stage::Merge { .. }
) {
@@ -65,19 +65,25 @@ pub(super) async fn scan_and_claim(
continue;
}
let item_claim = match item.stage() {
crate::pipeline_state::Stage::Coding { claim } => claim.as_ref(),
crate::pipeline_state::Stage::Merge { claim, .. } => claim.as_ref(),
_ => None,
};
// If already claimed by us, skip.
if item.claim().is_some_and(|c| c.node == our_node) {
if item_claim.is_some_and(|c| c.agent.0 == our_node) {
continue;
}
// If claimed by another node, respect the claim while it is fresh.
// Once the TTL expires the claim is considered stale regardless of
// whether the holder appears alive — displacement is purely TTL-driven.
if let Some(claim) = item.claim()
&& claim.node != our_node
if let Some(claim) = item_claim
&& claim.agent.0 != our_node
{
let now = chrono::Utc::now().timestamp() as u64;
let age = now.saturating_sub(claim.at) as f64;
let age = now.saturating_sub(claim.claimed_at.timestamp() as u64) as f64;
if age < CLAIM_TIMEOUT_SECS {
// Claim is still fresh — respect it.
continue;
@@ -87,7 +93,7 @@ pub(super) async fn scan_and_claim(
"[agent-mode] Displacing stale claim on '{}' held by {:.12}… \
(age {}s > TTL {}s)",
item.story_id(),
claim.node,
claim.agent.0,
age as u64,
CLAIM_TIMEOUT_SECS as u64,
);
@@ -179,7 +185,7 @@ pub(super) fn reclaim_timed_out_work(_project_root: &Path) {
for item in &items {
if !matches!(
item.stage(),
crate::pipeline_state::Stage::Coding
crate::pipeline_state::Stage::Coding { .. }
| crate::pipeline_state::Stage::Qa
| crate::pipeline_state::Stage::Merge { .. }
) {
@@ -189,13 +195,19 @@ pub(super) fn reclaim_timed_out_work(_project_root: &Path) {
// Release the claim if the TTL has expired — regardless of whether the
// holder is still alive. A node actively working should refresh its
// claim before the TTL window closes.
if let Some(claim) = item.claim() {
let age = now as u64 - claim.at.min(now as u64);
let reclaim_claim = match item.stage() {
crate::pipeline_state::Stage::Coding { claim } => claim.as_ref(),
crate::pipeline_state::Stage::Merge { claim, .. } => claim.as_ref(),
_ => None,
};
if let Some(claim) = reclaim_claim {
let claim_ts = claim.claimed_at.timestamp() as u64;
let age = now as u64 - claim_ts.min(now as u64);
if age as f64 >= CLAIM_TIMEOUT_SECS {
slog!(
"[agent-mode] Releasing stale claim on '{}' held by {:.12}… (age {}s)",
item.story_id(),
claim.node,
claim.agent.0,
age,
);
crdt_state::release_claim(item.story_id());