huskies: merge 1072
This commit is contained in:
+107
-1
@@ -29,7 +29,7 @@ pub mod shadow_write;
|
|||||||
|
|
||||||
pub use content_store::{ContentKey, all_content_ids, delete_content, read_content, write_content};
|
pub use content_store::{ContentKey, all_content_ids, delete_content, read_content, write_content};
|
||||||
pub use ops::{ItemMeta, delete_item, move_item_stage, next_item_number, write_item_with_content};
|
pub use ops::{ItemMeta, delete_item, move_item_stage, next_item_number, write_item_with_content};
|
||||||
pub use shadow_write::{get_shared_pool, init};
|
pub use shadow_write::{check_schema_drift, get_shared_pool, init};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub use content_store::ensure_content_store;
|
pub use content_store::ensure_content_store;
|
||||||
@@ -395,6 +395,112 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Regression: root cause of the 2026-05-14 21:07 production outage.
|
||||||
|
///
|
||||||
|
/// A headless agent on a feature branch (whose binary includes a new
|
||||||
|
/// sqlx migration) must NEVER apply that migration to the production
|
||||||
|
/// pipeline.db. Verify that opening an agent-local DB and running
|
||||||
|
/// migrations on it leaves the production DB's `_sqlx_migrations` table
|
||||||
|
/// unchanged.
|
||||||
|
///
|
||||||
|
/// The enforcement mechanism is in `init_subsystems(is_agent=true)`, which
|
||||||
|
/// redirects to a temp path. This test validates the SQLite isolation
|
||||||
|
/// property: migrations applied to one file are confined to that file.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn agent_db_isolation_does_not_affect_production_db() {
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let prod_db_path = tmp.path().join("production.db");
|
||||||
|
let agent_db_path = tmp.path().join("agent_temp.db");
|
||||||
|
|
||||||
|
// Set up the production DB — apply the current compiled-in migrations.
|
||||||
|
let prod_opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||||
|
.filename(&prod_db_path)
|
||||||
|
.create_if_missing(true);
|
||||||
|
let prod_pool = sqlx::SqlitePool::connect_with(prod_opts).await.unwrap();
|
||||||
|
sqlx::migrate!("./migrations")
|
||||||
|
.run(&prod_pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Record the migration versions present in the production DB.
|
||||||
|
let before: Vec<(i64,)> =
|
||||||
|
sqlx::query_as("SELECT version FROM _sqlx_migrations ORDER BY version")
|
||||||
|
.fetch_all(&prod_pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Simulate the agent opening its own isolated DB and running migrations.
|
||||||
|
let agent_opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||||
|
.filename(&agent_db_path)
|
||||||
|
.create_if_missing(true);
|
||||||
|
let agent_pool = sqlx::SqlitePool::connect_with(agent_opts).await.unwrap();
|
||||||
|
sqlx::migrate!("./migrations")
|
||||||
|
.run(&agent_pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Production DB must be completely unaffected by the agent's migration run.
|
||||||
|
let after: Vec<(i64,)> =
|
||||||
|
sqlx::query_as("SELECT version FROM _sqlx_migrations ORDER BY version")
|
||||||
|
.fetch_all(&prod_pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
before, after,
|
||||||
|
"agent opening its own DB must not alter the production DB migration table"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify that `check_schema_drift` returns an empty list when all
|
||||||
|
/// migrations in the database are recognised by this binary.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn check_schema_drift_empty_when_all_known() {
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let db_path = tmp.path().join("drift_test.db");
|
||||||
|
let opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||||
|
.filename(&db_path)
|
||||||
|
.create_if_missing(true);
|
||||||
|
let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
|
||||||
|
sqlx::migrate!("./migrations").run(&pool).await.unwrap();
|
||||||
|
|
||||||
|
let drift = super::shadow_write::check_schema_drift(&pool).await;
|
||||||
|
assert!(
|
||||||
|
drift.is_empty(),
|
||||||
|
"no drift expected when DB matches the compiled-in migration set"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify that `check_schema_drift` identifies a manually-inserted
|
||||||
|
/// migration row that is not part of the compiled-in set.
|
||||||
|
#[tokio::test]
|
||||||
|
async fn check_schema_drift_detects_unknown_migration() {
|
||||||
|
let tmp = tempfile::tempdir().unwrap();
|
||||||
|
let db_path = tmp.path().join("drift_future.db");
|
||||||
|
let opts = sqlx::sqlite::SqliteConnectOptions::new()
|
||||||
|
.filename(&db_path)
|
||||||
|
.create_if_missing(true);
|
||||||
|
let pool = sqlx::SqlitePool::connect_with(opts).await.unwrap();
|
||||||
|
sqlx::migrate!("./migrations").run(&pool).await.unwrap();
|
||||||
|
|
||||||
|
// Inject a fake "future" migration that no binary compiled today would know.
|
||||||
|
let fake_checksum: Vec<u8> = vec![0u8; 20];
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO _sqlx_migrations \
|
||||||
|
(version, description, installed_on, success, checksum, execution_time) \
|
||||||
|
VALUES (99999999999999, 'future_migration', '2099-01-01T00:00:00Z', 1, ?1, 0)",
|
||||||
|
)
|
||||||
|
.bind(&fake_checksum)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let drift = super::shadow_write::check_schema_drift(&pool).await;
|
||||||
|
assert_eq!(drift.len(), 1, "exactly one unknown migration expected");
|
||||||
|
assert_eq!(drift[0].version, 99999999999999_i64);
|
||||||
|
assert_eq!(drift[0].description, "future_migration");
|
||||||
|
}
|
||||||
|
|
||||||
/// Story 864: passing `ItemMeta::default()` against a content blob that
|
/// Story 864: passing `ItemMeta::default()` against a content blob that
|
||||||
/// LOOKS like front-matter must NOT silently extract metadata into the
|
/// LOOKS like front-matter must NOT silently extract metadata into the
|
||||||
/// CRDT. The whole point of removing the implicit YAML round-trip is
|
/// CRDT. The whole point of removing the implicit YAML round-trip is
|
||||||
|
|||||||
@@ -11,10 +11,23 @@ use crate::slog;
|
|||||||
use sqlx::SqlitePool;
|
use sqlx::SqlitePool;
|
||||||
use sqlx::sqlite::SqliteConnectOptions;
|
use sqlx::sqlite::SqliteConnectOptions;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::OnceLock;
|
use std::sync::OnceLock;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
|
/// One migration row in the live database that is not in the compiled-in set.
|
||||||
|
///
|
||||||
|
/// Returned by [`check_schema_drift`] for each unknown migration.
|
||||||
|
pub struct UnknownMigration {
|
||||||
|
/// sqlx migration version number (derived from the filename timestamp).
|
||||||
|
pub version: i64,
|
||||||
|
/// Human-readable description from the migration filename.
|
||||||
|
pub description: String,
|
||||||
|
/// When the migration was applied, as stored in `_sqlx_migrations.installed_on`.
|
||||||
|
pub installed_on: String,
|
||||||
|
}
|
||||||
|
|
||||||
/// The process-global SQLite pool, set once by [`init`].
|
/// The process-global SQLite pool, set once by [`init`].
|
||||||
///
|
///
|
||||||
/// Other modules call [`get_shared_pool`] to access the pool without needing
|
/// Other modules call [`get_shared_pool`] to access the pool without needing
|
||||||
@@ -133,3 +146,31 @@ pub async fn init(db_path: &Path) -> Result<(), sqlx::Error> {
|
|||||||
let _ = PIPELINE_DB.set(PipelineDb { tx });
|
let _ = PIPELINE_DB.set(PipelineDb { tx });
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compare the live `_sqlx_migrations` table against the compiled-in migration
|
||||||
|
/// set and return any rows whose version is not known to this binary.
|
||||||
|
///
|
||||||
|
/// A non-empty result means the database was previously opened by a newer
|
||||||
|
/// binary that applied additional migrations. The server must refuse to start
|
||||||
|
/// in that state because the schema may contain tables or columns that this
|
||||||
|
/// binary does not understand.
|
||||||
|
pub async fn check_schema_drift(pool: &SqlitePool) -> Vec<UnknownMigration> {
|
||||||
|
let migrator = sqlx::migrate!("./migrations");
|
||||||
|
let known: HashSet<i64> = migrator.migrations.iter().map(|m| m.version).collect();
|
||||||
|
|
||||||
|
let rows: Vec<(i64, String, String)> = sqlx::query_as(
|
||||||
|
"SELECT version, description, installed_on FROM _sqlx_migrations ORDER BY version",
|
||||||
|
)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
rows.into_iter()
|
||||||
|
.filter(|(v, _, _)| !known.contains(v))
|
||||||
|
.map(|(version, description, installed_on)| UnknownMigration {
|
||||||
|
version,
|
||||||
|
description,
|
||||||
|
installed_on,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|||||||
+1
-1
@@ -149,7 +149,7 @@ async fn main() -> Result<(), std::io::Error> {
|
|||||||
startup::project::open_project_root(is_init, explicit_path, &cwd, &app_state, &store, port)
|
startup::project::open_project_root(is_init, explicit_path, &cwd, &app_state, &store, port)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
startup::project::init_subsystems(&app_state, &cwd).await;
|
startup::project::init_subsystems(&app_state, &cwd, is_agent).await;
|
||||||
|
|
||||||
let crdt_join_token = cli
|
let crdt_join_token = cli
|
||||||
.join_token
|
.join_token
|
||||||
|
|||||||
@@ -217,7 +217,13 @@ async fn migrate_json_stores_to_sqlite(huskies_dir: &Path) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Set up the server log file, node identity keypair, pipeline DB, and CRDT state.
|
/// Set up the server log file, node identity keypair, pipeline DB, and CRDT state.
|
||||||
pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path) {
|
///
|
||||||
|
/// When `is_agent` is `true` the pipeline database is opened at an isolated
|
||||||
|
/// temporary path (or at `HUSKIES_DB_PATH` if that env-var is set) so that the
|
||||||
|
/// headless build agent never touches the production `.huskies/pipeline.db`.
|
||||||
|
/// This prevents feature-branch migrations from being applied to the shared
|
||||||
|
/// database and bricking the next server restart.
|
||||||
|
pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path, is_agent: bool) {
|
||||||
// Enable persistent server log file now that the project root is known.
|
// Enable persistent server log file now that the project root is known.
|
||||||
if let Some(ref root) = *app_state.project_root.lock().unwrap() {
|
if let Some(ref root) = *app_state.project_root.lock().unwrap() {
|
||||||
let log_dir = root.join(".huskies").join("logs");
|
let log_dir = root.join(".huskies").join("logs");
|
||||||
@@ -242,20 +248,91 @@ pub(crate) async fn init_subsystems(app_state: &Arc<SessionState>, cwd: &Path) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialise the SQLite pipeline shadow-write database and CRDT state layer.
|
// Resolve the pipeline DB path.
|
||||||
// Clone the path out before the await so we don't hold the MutexGuard across
|
//
|
||||||
// an await point.
|
// Priority order:
|
||||||
let pipeline_db_path = app_state
|
// 1. HUSKIES_DB_PATH env var (operator override, any mode)
|
||||||
|
// 2. Agent mode: process-local temp file so the production DB is never touched
|
||||||
|
// 3. Default: {project_root}/.huskies/pipeline.db
|
||||||
|
let pipeline_db_path: Option<PathBuf> = if let Ok(env_path) = std::env::var("HUSKIES_DB_PATH") {
|
||||||
|
let p = PathBuf::from(&env_path);
|
||||||
|
crate::slog!("[db] HUSKIES_DB_PATH override: {}", p.display());
|
||||||
|
Some(p)
|
||||||
|
} else if is_agent {
|
||||||
|
// Headless agent: use an isolated temp DB so that any migrations compiled
|
||||||
|
// into this binary (e.g. from a feature branch) are never applied to the
|
||||||
|
// production database. The temp file is process-unique and harmless to
|
||||||
|
// leave behind after the agent exits.
|
||||||
|
let pid = std::process::id();
|
||||||
|
let temp_path = std::env::temp_dir().join(format!("huskies-agent-{pid}.db"));
|
||||||
|
crate::slog!(
|
||||||
|
"[db] Agent mode: using isolated DB at {} (not touching production pipeline.db)",
|
||||||
|
temp_path.display()
|
||||||
|
);
|
||||||
|
Some(temp_path)
|
||||||
|
} else {
|
||||||
|
// Server mode: use the project-local production database.
|
||||||
|
app_state
|
||||||
.project_root
|
.project_root
|
||||||
.lock()
|
.lock()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|root| root.join(".huskies").join("pipeline.db"));
|
.map(|root| root.join(".huskies").join("pipeline.db"))
|
||||||
|
};
|
||||||
|
|
||||||
if let Some(ref db_path) = pipeline_db_path {
|
if let Some(ref db_path) = pipeline_db_path {
|
||||||
if let Err(e) = db::init(db_path).await {
|
if let Err(e) = db::init(db_path).await {
|
||||||
crate::slog!("[db] Failed to initialise pipeline.db: {e}");
|
crate::slog!("[db] Failed to initialise pipeline.db: {e}");
|
||||||
} else {
|
} else {
|
||||||
|
// ── Migration drift self-check (server mode only) ─────────────────────
|
||||||
|
//
|
||||||
|
// In server mode, detect whether the live database contains migrations
|
||||||
|
// that were applied by a newer binary (e.g. a feature-branch agent that
|
||||||
|
// ran before the feature was merged). If so, log each unknown migration
|
||||||
|
// and exit with a clear actionable message. This is the root cause of
|
||||||
|
// the 2026-05-14 21:07 production outage where the server came up but
|
||||||
|
// the CRDT never initialised.
|
||||||
|
if !is_agent && let Some(pool) = db::get_shared_pool() {
|
||||||
|
let drift = db::check_schema_drift(pool).await;
|
||||||
|
if !drift.is_empty() {
|
||||||
|
for m in &drift {
|
||||||
|
crate::slog!(
|
||||||
|
"[db] UNKNOWN migration {} ('{}') applied at {} \
|
||||||
|
is not in the compiled-in set",
|
||||||
|
m.version,
|
||||||
|
m.description,
|
||||||
|
m.installed_on,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
eprintln!();
|
||||||
|
eprintln!(
|
||||||
|
"error: pipeline.db contains {} migration(s) that are not \
|
||||||
|
recognised by this binary:",
|
||||||
|
drift.len()
|
||||||
|
);
|
||||||
|
for m in &drift {
|
||||||
|
eprintln!(
|
||||||
|
" \u{2022} migration {} ('{}') applied at {}",
|
||||||
|
m.version, m.description, m.installed_on
|
||||||
|
);
|
||||||
|
}
|
||||||
|
eprintln!();
|
||||||
|
eprintln!(
|
||||||
|
"This means the database was previously opened by a newer \
|
||||||
|
version of huskies."
|
||||||
|
);
|
||||||
|
eprintln!(
|
||||||
|
"To fix: rebuild huskies from the latest source (the branch \
|
||||||
|
that added these migrations) and restart."
|
||||||
|
);
|
||||||
|
eprintln!(
|
||||||
|
"Do NOT start the old binary against this database — it will \
|
||||||
|
behave incorrectly."
|
||||||
|
);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// One-shot migration: move any existing JSON store files into SQLite.
|
// One-shot migration: move any existing JSON store files into SQLite.
|
||||||
let huskies_dir = db_path.parent().unwrap_or(db_path);
|
let huskies_dir = db_path.parent().unwrap_or(db_path);
|
||||||
migrate_json_stores_to_sqlite(huskies_dir).await;
|
migrate_json_stores_to_sqlite(huskies_dir).await;
|
||||||
|
|||||||
Reference in New Issue
Block a user