huskies: merge 1138 story In-container huskies self-update — huskies upgrade pulls a fresh binary without docker rebuild

This commit is contained in:
dave
2026-05-18 13:28:53 +00:00
parent d10634c7d6
commit 0ec5c05de8
6 changed files with 590 additions and 2 deletions
+107
View File
@@ -26,6 +26,8 @@ const GATEWAY_TOOLS: &[&str] = &[
// Handled at the gateway so the Matrix bot's perm_rx listener is used
// rather than the container's (which has no interactive session attached).
"prompt_permission",
// Binary self-update: gateway serves its own binary and triggers upgrade on sleds.
"upgrade_sled",
];
/// Gateway tool definitions.
@@ -121,6 +123,23 @@ pub(crate) fn gateway_tool_definitions() -> Vec<Value> {
"properties": {}
}
}),
json!({
"name": "upgrade_sled",
"description": "Trigger a binary self-update on a project sled. The sled downloads the new binary from `source_url` (defaults to this gateway's /api/huskies-binary endpoint), atomically replaces its own executable, drains CRDT persistence so no ops are lost, and re-execs. Without `project`, upgrades the active project.",
"inputSchema": {
"type": "object",
"properties": {
"project": {
"type": "string",
"description": "Name of the project sled to upgrade. Defaults to the currently active project."
},
"source_url": {
"type": "string",
"description": "HTTP URL of the binary to install (e.g. 'http://gateway:3000/api/huskies-binary'). Defaults to this gateway's own binary endpoint."
}
}
}
}),
]
}
@@ -385,6 +404,7 @@ async fn handle_gateway_tool(
"aggregate_pipeline_status" => handle_aggregate_pipeline_status_tool(state, id).await,
"agents.list" => handle_agents_list_tool(id),
"prompt_permission" => handle_prompt_permission_tool(params, state, id).await,
"upgrade_sled" => handle_upgrade_sled_tool(params, state, id).await,
_ => JsonRpcResponse::error(id, -32601, format!("Unknown gateway tool: {tool_name}")),
}
}
@@ -769,6 +789,93 @@ fn handle_agents_list_tool(id: Option<Value>) -> JsonRpcResponse {
)
}
/// Handle the `upgrade_sled` gateway tool.
///
/// Posts `{"source_url": "<url>"}` to the target sled's `/api/upgrade` endpoint,
/// which triggers the sled to download the new binary, drain CRDT persistence,
/// and re-exec. Returns 202 text immediately — the sled connection will drop
/// shortly after as `exec()` replaces the process.
async fn handle_upgrade_sled_tool(
params: &Value,
state: &GatewayState,
id: Option<Value>,
) -> JsonRpcResponse {
let args = params.get("arguments").unwrap_or(params);
// Resolve target project URL (explicit project arg or active project).
let project_name = args.get("project").and_then(|v| v.as_str());
let sled_url = if let Some(name) = project_name {
let projects = state.projects.read().await;
match projects.get(name).and_then(|e| e.url.clone()) {
Some(u) => u,
None => {
return JsonRpcResponse::error(
id,
-32602,
format!("Project '{name}' not found or has no URL configured"),
);
}
}
} else {
match state.active_url().await {
Ok(u) => u,
Err(e) => return JsonRpcResponse::error(id, -32603, e.to_string()),
}
};
// Build the binary source URL: caller-supplied or this gateway's own endpoint.
let source_url = args
.get("source_url")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.unwrap_or_else(|| {
// Default: the gateway serves its own binary at /api/huskies-binary.
// Use the same host/port the gateway is bound to.
std::env::var("HUSKIES_GATEWAY_BINARY_URL")
.unwrap_or_else(|_| format!("http://gateway:{}/api/huskies-binary", state.port))
});
let upgrade_url = format!("{sled_url}/api/upgrade");
let body = serde_json::json!({ "source_url": source_url });
let active_name = project_name.map(|s| s.to_string()).unwrap_or_else(|| {
state
.active_project
.try_read()
.map(|g| g.clone())
.unwrap_or_default()
});
match state.client.post(&upgrade_url).json(&body).send().await {
Ok(resp) if resp.status().is_success() || resp.status().as_u16() == 202 => {
JsonRpcResponse::success(
id,
json!({
"content": [{
"type": "text",
"text": format!(
"Upgrade triggered on '{active_name}'. The sled is downloading the new binary from {source_url} and will re-exec momentarily."
)
}]
}),
)
}
Ok(resp) => JsonRpcResponse::error(
id,
-32603,
format!(
"Sled returned HTTP {} for upgrade request to {upgrade_url}",
resp.status()
),
),
Err(e) => JsonRpcResponse::error(
id,
-32603,
format!("Failed to send upgrade request to {upgrade_url}: {e}"),
),
}
}
/// Handle the `pipeline.get` read-RPC — returns per-project item lists in the
/// shape expected by the gateway web UI:
/// `{ "active": "...", "projects": { "name": { "active": [...], "backlog_count": N } } }`.
+70
View File
@@ -104,6 +104,10 @@ pub fn build_routes(
route = route.at("/api/events", get(events::events_handler).data(buf));
}
route = route
.at("/api/upgrade", post(upgrade_trigger_handler))
.at("/api/huskies-binary", get(serve_binary_handler));
if let Some(wa_ctx) = whatsapp_ctx {
route = route.at(
"/webhook/whatsapp",
@@ -209,6 +213,72 @@ pub fn debug_crdt_handler(req: &poem::Request) -> poem::Response {
.body(serde_json::to_string_pretty(&body).unwrap_or_default())
}
/// `POST /api/upgrade` — trigger a self-update on the running sled.
///
/// Accepts `{"source_url": "http://gateway:3000/api/huskies-binary"}` and
/// spawns the upgrade task in the background, returning 202 immediately.
/// The connection will be dropped when `exec()` replaces the process.
#[poem::handler]
pub async fn upgrade_trigger_handler(
body: poem::web::Json<serde_json::Value>,
ctx: poem::web::Data<&std::sync::Arc<AppContext>>,
) -> poem::Response {
let source_url = match body
.0
.get("source_url")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
{
Some(u) => u,
None => {
return poem::Response::builder()
.status(StatusCode::BAD_REQUEST)
.body("Missing required field: source_url");
}
};
let project_root = ctx.state.get_project_root().unwrap_or_default();
// Spawn upgrade in background so we can return 202 before exec() fires.
tokio::spawn(async move {
if let Err(e) = crate::upgrade::upgrade_and_reexec(&source_url, &project_root).await {
crate::slog!("[upgrade] Upgrade failed: {e}");
}
});
poem::Response::builder()
.status(StatusCode::ACCEPTED)
.body("Upgrade triggered. The sled will re-exec momentarily.")
}
/// `GET /api/huskies-binary` — serve the running binary so peer sleds can download it.
///
/// Streams `current_exe()` (the binary that is currently running) as an
/// `application/octet-stream` download. Returns 500 if the path cannot be
/// resolved or read.
#[poem::handler]
pub async fn serve_binary_handler() -> poem::Response {
let exe = match std::env::current_exe() {
Ok(p) => p,
Err(e) => {
return poem::Response::builder()
.status(StatusCode::INTERNAL_SERVER_ERROR)
.body(format!("Cannot resolve current executable: {e}"));
}
};
match tokio::fs::read(&exe).await {
Ok(bytes) => poem::Response::builder()
.status(StatusCode::OK)
.header("Content-Type", "application/octet-stream")
.header("Content-Disposition", "attachment; filename=\"huskies\"")
.body(bytes),
Err(e) => poem::Response::builder()
.status(StatusCode::INTERNAL_SERVER_ERROR)
.body(format!("Cannot read binary at {}: {e}", exe.display())),
}
}
#[cfg(test)]
mod tests {
use super::*;