huskies: merge 1147 story One-active-gateway invariant via pidfile+flock — prevent double-gateway during restarts
This commit is contained in:
@@ -0,0 +1,121 @@
|
||||
//! Gateway pidfile — exclusive flock on `$HOME/.huskies/gateway.pid`.
|
||||
//!
|
||||
//! A gateway process holds the lock for its lifetime. A second gateway that
|
||||
//! tries to start while one is already running fails immediately with a
|
||||
//! human-readable error naming the existing process. A stale pidfile left by
|
||||
//! a dead process is reclaimed automatically: the kernel releases flocks when
|
||||
//! the file descriptor is closed, which happens when the process dies.
|
||||
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
// ── Guard ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Held for the lifetime of the gateway process. Dropping it releases the flock.
|
||||
#[derive(Debug)]
|
||||
pub struct PidfileGuard {
|
||||
_file: File,
|
||||
}
|
||||
|
||||
// ── Path resolution ───────────────────────────────────────────────────────────
|
||||
|
||||
/// Resolve `$HOME/.huskies/gateway.pid`, creating the directory if needed.
|
||||
fn default_pidfile_path() -> Result<PathBuf, String> {
|
||||
let home = homedir::my_home()
|
||||
.map_err(|e| format!("cannot determine home directory: {e}"))?
|
||||
.ok_or_else(|| "HOME is not set".to_string())?;
|
||||
let dir = home.join(".huskies");
|
||||
std::fs::create_dir_all(&dir).map_err(|e| format!("cannot create {}: {e}", dir.display()))?;
|
||||
Ok(dir.join("gateway.pid"))
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Acquire the gateway pidfile at `$HOME/.huskies/gateway.pid`.
|
||||
///
|
||||
/// Returns a [`PidfileGuard`] that holds the exclusive flock for as long as it
|
||||
/// is in scope. Returns `Err("another gateway is at pid N")` when a live
|
||||
/// gateway already holds the lock, or `Err(…)` for unexpected I/O failures.
|
||||
pub fn acquire_gateway_pidfile() -> Result<PidfileGuard, String> {
|
||||
let path = default_pidfile_path()?;
|
||||
acquire_gateway_pidfile_at(&path)
|
||||
}
|
||||
|
||||
/// Acquire the gateway pidfile at an explicit path.
|
||||
///
|
||||
/// Separated from [`acquire_gateway_pidfile`] so that tests can supply a
|
||||
/// temporary directory instead of touching `$HOME/.huskies`.
|
||||
pub fn acquire_gateway_pidfile_at(path: &Path) -> Result<PidfileGuard, String> {
|
||||
let mut file = OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(false)
|
||||
.open(path)
|
||||
.map_err(|e| format!("cannot open pidfile {}: {e}", path.display()))?;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::io::AsRawFd;
|
||||
let ret = unsafe { libc::flock(file.as_raw_fd(), libc::LOCK_EX | libc::LOCK_NB) };
|
||||
if ret != 0 {
|
||||
let err = std::io::Error::last_os_error();
|
||||
if err.kind() == std::io::ErrorKind::WouldBlock
|
||||
|| err.raw_os_error() == Some(libc::EACCES)
|
||||
{
|
||||
// Another live process holds the lock — read its PID for the error message.
|
||||
let pid_str = std::fs::read_to_string(path).unwrap_or_default();
|
||||
let pid = pid_str.trim().parse::<u32>().unwrap_or(0);
|
||||
return Err(format!("another gateway is at pid {pid}"));
|
||||
}
|
||||
return Err(format!("flock failed: {err}"));
|
||||
}
|
||||
}
|
||||
|
||||
// Write our PID (truncate first so no stale digits remain).
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
file.set_len(0)
|
||||
.map_err(|e| format!("cannot truncate pidfile: {e}"))?;
|
||||
file.seek(SeekFrom::Start(0))
|
||||
.map_err(|e| format!("cannot seek pidfile: {e}"))?;
|
||||
write!(file, "{}", std::process::id()).map_err(|e| format!("cannot write pidfile: {e}"))?;
|
||||
|
||||
Ok(PidfileGuard { _file: file })
|
||||
}
|
||||
|
||||
// ── Tests ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// AC 2 & 3: second gateway fails with pid message; after release, the next
|
||||
/// acquire succeeds (dead-PID reclaim).
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn second_gateway_fails_with_pid_message_then_reclaims() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let path = tmp.path().join("gateway.pid");
|
||||
|
||||
let guard1 = acquire_gateway_pidfile_at(&path).expect("first acquire should succeed");
|
||||
|
||||
let err = acquire_gateway_pidfile_at(&path)
|
||||
.expect_err("second acquire should fail while first is held");
|
||||
|
||||
let my_pid = std::process::id();
|
||||
assert!(
|
||||
err.contains("another gateway is at pid"),
|
||||
"error should contain the prefix, got: {err}"
|
||||
);
|
||||
assert!(
|
||||
err.contains(&my_pid.to_string()),
|
||||
"error should contain our PID {my_pid}, got: {err}"
|
||||
);
|
||||
|
||||
// Release the first guard → flock is freed (simulates gateway death).
|
||||
drop(guard1);
|
||||
|
||||
// Third acquire must succeed — dead-PID reclaim.
|
||||
acquire_gateway_pidfile_at(&path).expect("acquire after release should succeed");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user