huskies: merge 1147 story One-active-gateway invariant via pidfile+flock — prevent double-gateway during restarts

This commit is contained in:
dave
2026-05-19 18:29:19 +00:00
parent 918f18c200
commit be7bdf8304
4 changed files with 136 additions and 1 deletions
+121
View File
@@ -0,0 +1,121 @@
//! Gateway pidfile — exclusive flock on `$HOME/.huskies/gateway.pid`.
//!
//! A gateway process holds the lock for its lifetime. A second gateway that
//! tries to start while one is already running fails immediately with a
//! human-readable error naming the existing process. A stale pidfile left by
//! a dead process is reclaimed automatically: the kernel releases flocks when
//! the file descriptor is closed, which happens when the process dies.
use std::fs::{File, OpenOptions};
use std::path::{Path, PathBuf};
// ── Guard ─────────────────────────────────────────────────────────────────────
/// Held for the lifetime of the gateway process. Dropping it releases the flock.
#[derive(Debug)]
pub struct PidfileGuard {
_file: File,
}
// ── Path resolution ───────────────────────────────────────────────────────────
/// Resolve `$HOME/.huskies/gateway.pid`, creating the directory if needed.
fn default_pidfile_path() -> Result<PathBuf, String> {
let home = homedir::my_home()
.map_err(|e| format!("cannot determine home directory: {e}"))?
.ok_or_else(|| "HOME is not set".to_string())?;
let dir = home.join(".huskies");
std::fs::create_dir_all(&dir).map_err(|e| format!("cannot create {}: {e}", dir.display()))?;
Ok(dir.join("gateway.pid"))
}
// ── Public API ────────────────────────────────────────────────────────────────
/// Acquire the gateway pidfile at `$HOME/.huskies/gateway.pid`.
///
/// Returns a [`PidfileGuard`] that holds the exclusive flock for as long as it
/// is in scope. Returns `Err("another gateway is at pid N")` when a live
/// gateway already holds the lock, or `Err(…)` for unexpected I/O failures.
pub fn acquire_gateway_pidfile() -> Result<PidfileGuard, String> {
let path = default_pidfile_path()?;
acquire_gateway_pidfile_at(&path)
}
/// Acquire the gateway pidfile at an explicit path.
///
/// Separated from [`acquire_gateway_pidfile`] so that tests can supply a
/// temporary directory instead of touching `$HOME/.huskies`.
pub fn acquire_gateway_pidfile_at(path: &Path) -> Result<PidfileGuard, String> {
let mut file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(false)
.open(path)
.map_err(|e| format!("cannot open pidfile {}: {e}", path.display()))?;
#[cfg(unix)]
{
use std::os::unix::io::AsRawFd;
let ret = unsafe { libc::flock(file.as_raw_fd(), libc::LOCK_EX | libc::LOCK_NB) };
if ret != 0 {
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock
|| err.raw_os_error() == Some(libc::EACCES)
{
// Another live process holds the lock — read its PID for the error message.
let pid_str = std::fs::read_to_string(path).unwrap_or_default();
let pid = pid_str.trim().parse::<u32>().unwrap_or(0);
return Err(format!("another gateway is at pid {pid}"));
}
return Err(format!("flock failed: {err}"));
}
}
// Write our PID (truncate first so no stale digits remain).
use std::io::{Seek, SeekFrom, Write};
file.set_len(0)
.map_err(|e| format!("cannot truncate pidfile: {e}"))?;
file.seek(SeekFrom::Start(0))
.map_err(|e| format!("cannot seek pidfile: {e}"))?;
write!(file, "{}", std::process::id()).map_err(|e| format!("cannot write pidfile: {e}"))?;
Ok(PidfileGuard { _file: file })
}
// ── Tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
/// AC 2 & 3: second gateway fails with pid message; after release, the next
/// acquire succeeds (dead-PID reclaim).
#[cfg(unix)]
#[test]
fn second_gateway_fails_with_pid_message_then_reclaims() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("gateway.pid");
let guard1 = acquire_gateway_pidfile_at(&path).expect("first acquire should succeed");
let err = acquire_gateway_pidfile_at(&path)
.expect_err("second acquire should fail while first is held");
let my_pid = std::process::id();
assert!(
err.contains("another gateway is at pid"),
"error should contain the prefix, got: {err}"
);
assert!(
err.contains(&my_pid.to_string()),
"error should contain our PID {my_pid}, got: {err}"
);
// Release the first guard → flock is freed (simulates gateway death).
drop(guard1);
// Third acquire must succeed — dead-PID reclaim.
acquire_gateway_pidfile_at(&path).expect("acquire after release should succeed");
}
}