huskies: merge 899
This commit is contained in:
@@ -203,14 +203,11 @@ pub async fn gateway_mcp_post_handler(
|
||||
}
|
||||
|
||||
/// Proxy a request to the active project and format the response.
|
||||
///
|
||||
/// Prefers the live sled-uplink WebSocket when one is attached (story 899
|
||||
/// AC 2); falls back to the legacy HTTP proxy otherwise.
|
||||
async fn proxy_and_respond(state: &GatewayState, bytes: &[u8], id: Option<Value>) -> Response {
|
||||
let url = match state.active_url().await {
|
||||
Ok(u) => u,
|
||||
Err(e) => {
|
||||
return to_json_response(JsonRpcResponse::error(id, -32603, e.to_string()));
|
||||
}
|
||||
};
|
||||
match gateway::io::proxy_mcp_call(&state.client, &url, bytes).await {
|
||||
match state.proxy_active_mcp(bytes).await {
|
||||
Ok(resp_body) => Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header("Content-Type", "application/json")
|
||||
@@ -316,13 +313,23 @@ fn handle_initialize(id: Option<Value>) -> JsonRpcResponse {
|
||||
}
|
||||
|
||||
/// Fetch tools/list from the active project and merge in gateway tools.
|
||||
///
|
||||
/// Routes via the sled-uplink WS when one is attached (story 899 AC 2);
|
||||
/// falls back to HTTP otherwise.
|
||||
async fn handle_tools_list(
|
||||
state: &GatewayState,
|
||||
id: Option<Value>,
|
||||
) -> Result<JsonRpcResponse, String> {
|
||||
let url = state.active_url().await.map_err(|e| e.to_string())?;
|
||||
|
||||
let resp_json = gateway::io::fetch_tools_list(&state.client, &url).await?;
|
||||
let rpc_body = json!({
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "tools/list",
|
||||
"params": {}
|
||||
});
|
||||
let bytes = serde_json::to_vec(&rpc_body).map_err(|e| e.to_string())?;
|
||||
let resp_bytes = state.proxy_active_mcp(&bytes).await?;
|
||||
let resp_json: Value =
|
||||
serde_json::from_slice(&resp_bytes).map_err(|e| format!("invalid tools/list JSON: {e}"))?;
|
||||
|
||||
let mut tools: Vec<Value> = resp_json
|
||||
.get("result")
|
||||
@@ -414,21 +421,36 @@ async fn handle_gateway_status_tool(state: &GatewayState, id: Option<Value>) ->
|
||||
async fn handle_gateway_health_tool(state: &GatewayState, id: Option<Value>) -> JsonRpcResponse {
|
||||
let mut results = BTreeMap::new();
|
||||
|
||||
let project_entries: Vec<(String, String)> = state
|
||||
// Build the project list, preferring the WS-uplink heartbeat as the
|
||||
// source of truth for liveness (story 899 AC 3). HTTP polls are used
|
||||
// only as a fallback when no live sled is connected.
|
||||
let project_names: Vec<(String, Option<String>)> = state
|
||||
.projects
|
||||
.read()
|
||||
.await
|
||||
.iter()
|
||||
.map(|(n, e)| (n.clone(), e.url.clone()))
|
||||
.collect();
|
||||
for (name, url) in &project_entries {
|
||||
let status = match gateway::io::check_project_health(&state.client, url).await {
|
||||
Ok(true) => "healthy".to_string(),
|
||||
Ok(false) => "unhealthy".to_string(),
|
||||
Err(e) => e,
|
||||
let sled_conns = state.sled_connections.read().await;
|
||||
for (name, url_opt) in &project_names {
|
||||
let status = if let Some(conn) = sled_conns.get(name) {
|
||||
if conn.is_alive(crate::service::gateway::HEARTBEAT_MAX_AGE_MS) {
|
||||
"healthy (ws)".to_string()
|
||||
} else {
|
||||
"stale (ws heartbeat overdue)".to_string()
|
||||
}
|
||||
} else if let Some(url) = url_opt {
|
||||
match gateway::io::check_project_health(&state.client, url).await {
|
||||
Ok(true) => "healthy".to_string(),
|
||||
Ok(false) => "unhealthy".to_string(),
|
||||
Err(e) => e,
|
||||
}
|
||||
} else {
|
||||
"no uplink and no url configured".to_string()
|
||||
};
|
||||
results.insert(name.clone(), status);
|
||||
}
|
||||
drop(sled_conns);
|
||||
|
||||
let active = state.active_project.read().await.clone();
|
||||
JsonRpcResponse::success(
|
||||
@@ -512,7 +534,7 @@ async fn handle_aggregate_pipeline_status_tool(
|
||||
.read()
|
||||
.await
|
||||
.iter()
|
||||
.map(|(name, entry)| (name.clone(), entry.url.clone()))
|
||||
.filter_map(|(name, entry)| entry.url.as_ref().map(|u| (name.clone(), u.clone())))
|
||||
.collect();
|
||||
|
||||
let statuses =
|
||||
@@ -656,7 +678,7 @@ async fn handle_pipeline_get(state: &GatewayState, id: Option<Value>) -> JsonRpc
|
||||
.read()
|
||||
.await
|
||||
.iter()
|
||||
.map(|(n, e)| (n.clone(), e.url.clone()))
|
||||
.filter_map(|(n, e)| e.url.as_ref().map(|u| (n.clone(), u.clone())))
|
||||
.collect();
|
||||
|
||||
let results = gateway::io::fetch_all_project_pipeline_items(&project_urls, &state.client).await;
|
||||
|
||||
@@ -155,21 +155,30 @@ struct SledUplinkParams {
|
||||
token: Option<String>,
|
||||
}
|
||||
|
||||
/// `GET /api/sled-uplink` — gateway-side WebSocket endpoint for sled permission uplinks.
|
||||
/// `GET /api/sled-uplink` — gateway-side WebSocket endpoint for sled uplinks.
|
||||
///
|
||||
/// # Authentication
|
||||
///
|
||||
/// The connecting sled must supply a valid shared-secret token via the `token`
|
||||
/// query parameter. Tokens are configured in `[sled_tokens]` in `projects.toml`
|
||||
/// as `sled_id = "secret"` entries.
|
||||
/// query parameter. Tokens are configured either as per-project `auth_token`
|
||||
/// fields under `[projects.<name>]` in `projects.toml` (preferred, story 899)
|
||||
/// or, for backwards compatibility, in the deprecated `[sled_tokens]` table.
|
||||
///
|
||||
/// # Protocol
|
||||
///
|
||||
/// See `sled_uplink.rs` for the wire format ([`UplinkEnvelope`]). The gateway
|
||||
/// accepts `perm_request` messages, injects them into the local permission
|
||||
/// pipeline (via `state.perm_tx`), and sends `perm_response` frames back to the
|
||||
/// sled once the Matrix bot resolves them. Multiple sleds are demuxed by
|
||||
/// connection: each handler owns exactly one sled's request/response flow.
|
||||
/// See `sled_uplink.rs` for the wire format ([`UplinkEnvelope`]).
|
||||
///
|
||||
/// Phase 2 (story 899) expands the protocol from the original perm-only flow
|
||||
/// to a full bidirectional MCP transport:
|
||||
///
|
||||
/// - **sled → gateway:** `identity` (mandatory first frame after upgrade),
|
||||
/// `heartbeat`, `perm_request`, `mcp_response`.
|
||||
/// - **gateway → sled:** `mcp_request`, `perm_response`.
|
||||
///
|
||||
/// On `identity`, the connection is published to
|
||||
/// [`GatewayState::sled_connections`] under the project name, allowing the
|
||||
/// MCP proxy (`mcp.rs::proxy_and_respond`) to route subsequent calls over
|
||||
/// the live WS instead of HTTP. The entry is removed on disconnect.
|
||||
#[handler]
|
||||
pub async fn gateway_sled_uplink_handler(
|
||||
ws: WebSocket,
|
||||
@@ -196,82 +205,211 @@ pub async fn gateway_sled_uplink_handler(
|
||||
|
||||
use poem::IntoResponse as _;
|
||||
let perm_tx = state.perm_tx.clone();
|
||||
let state = Arc::clone(&state);
|
||||
ws.on_upgrade(move |socket| async move {
|
||||
let (mut sink, mut stream) = socket.split();
|
||||
// Aggregator channel: spawned per-request tasks send (req_id, decision) here
|
||||
// so the main loop can write perm_response frames back to the sled.
|
||||
let (agg_tx, mut agg_rx) = tokio::sync::mpsc::unbounded_channel::<(
|
||||
String,
|
||||
crate::http::context::PermissionDecision,
|
||||
)>();
|
||||
run_sled_uplink_session(socket, state, sled_id, perm_tx).await;
|
||||
})
|
||||
.into_response()
|
||||
}
|
||||
|
||||
crate::slog!("[gateway/sled-uplink] Sled '{}' connected", sled_id);
|
||||
/// Run a single connected sled's request/response flow until it disconnects.
|
||||
///
|
||||
/// Performs the identity handshake, registers a [`SledConnection`] in
|
||||
/// [`GatewayState::sled_connections`] under the published project name, and
|
||||
/// pumps messages bidirectionally for the lifetime of the WebSocket.
|
||||
async fn run_sled_uplink_session(
|
||||
socket: poem::web::websocket::WebSocketStream,
|
||||
state: Arc<GatewayState>,
|
||||
token_sled_id: String,
|
||||
perm_tx: tokio::sync::mpsc::UnboundedSender<crate::http::context::PermissionForward>,
|
||||
) {
|
||||
use crate::service::gateway::SledConnection;
|
||||
use crate::sled_uplink::UplinkEnvelope;
|
||||
use std::sync::Arc as StdArc;
|
||||
use std::sync::atomic::AtomicI64;
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = stream.next() => {
|
||||
let text = match msg {
|
||||
Some(Ok(WsMessage::Text(t))) => t,
|
||||
Some(Ok(WsMessage::Close(_))) | None => break,
|
||||
_ => continue,
|
||||
};
|
||||
let Ok(env) = serde_json::from_str::<crate::sled_uplink::UplinkEnvelope>(&text) else {
|
||||
continue;
|
||||
};
|
||||
if env.msg_type == "perm_request" {
|
||||
let req_id = env.req_id.clone();
|
||||
let tool_name = env.payload.get("tool_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let tool_input = env.payload.get("tool_input")
|
||||
.cloned()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
let (response_tx, response_rx) = tokio::sync::oneshot::channel();
|
||||
let fwd = crate::http::context::PermissionForward {
|
||||
request_id: format!("{sled_id}:{req_id}"),
|
||||
tool_name,
|
||||
tool_input,
|
||||
response_tx,
|
||||
};
|
||||
if perm_tx.send(fwd).is_err() {
|
||||
break;
|
||||
}
|
||||
let agg_tx2 = agg_tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let decision = response_rx
|
||||
.await
|
||||
.unwrap_or(crate::http::context::PermissionDecision::Deny);
|
||||
let _ = agg_tx2.send((req_id, decision));
|
||||
});
|
||||
}
|
||||
let (mut sink, mut stream) = socket.split();
|
||||
|
||||
// ── Identity handshake: expect the first frame to be `identity` ──────
|
||||
let identity = match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(10),
|
||||
wait_for_identity(&mut stream),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Some(p)) => p,
|
||||
_ => {
|
||||
crate::slog!(
|
||||
"[gateway/sled-uplink] '{}' missing identity frame; closing",
|
||||
token_sled_id
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Project name in the identity must match the project resolved from the
|
||||
// auth token; otherwise the sled is claiming to be a different project.
|
||||
if identity != token_sled_id {
|
||||
crate::slog!(
|
||||
"[gateway/sled-uplink] identity mismatch (token says '{}', sled claims '{}'); closing",
|
||||
token_sled_id,
|
||||
identity
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Build SledConnection and publish it ─────────────────────────────
|
||||
let (out_tx, mut out_rx) = tokio::sync::mpsc::unbounded_channel::<UplinkEnvelope>();
|
||||
let conn = SledConnection {
|
||||
tx: out_tx,
|
||||
last_heartbeat_ms: StdArc::new(AtomicI64::new(chrono::Utc::now().timestamp_millis())),
|
||||
in_flight: StdArc::new(tokio::sync::Mutex::new(std::collections::HashMap::new())),
|
||||
};
|
||||
state
|
||||
.register_sled_connection(identity.clone(), conn.clone())
|
||||
.await;
|
||||
crate::slog!(
|
||||
"[gateway/sled-uplink] Sled '{}' connected and registered",
|
||||
identity
|
||||
);
|
||||
|
||||
// Aggregator channel for perm responses produced by spawned per-request tasks.
|
||||
let (agg_tx, mut agg_rx) =
|
||||
tokio::sync::mpsc::unbounded_channel::<(String, crate::http::context::PermissionDecision)>(
|
||||
);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
// Outbound: forward queued envelopes (mcp_request, etc.) to the sled.
|
||||
Some(env) = out_rx.recv() => {
|
||||
let Ok(text) = serde_json::to_string(&env) else { continue };
|
||||
if sink.send(WsMessage::Text(text)).await.is_err() {
|
||||
break;
|
||||
}
|
||||
Some((req_id, decision)) = agg_rx.recv() => {
|
||||
use crate::http::context::PermissionDecision;
|
||||
let (approved, always_allow) = match decision {
|
||||
PermissionDecision::AlwaysAllow => (true, true),
|
||||
PermissionDecision::Approve => (true, false),
|
||||
PermissionDecision::Deny => (false, false),
|
||||
};
|
||||
let resp = crate::sled_uplink::UplinkEnvelope {
|
||||
msg_type: "perm_response".to_string(),
|
||||
req_id,
|
||||
payload: serde_json::json!({
|
||||
"approved": approved,
|
||||
"always_allow": always_allow,
|
||||
}),
|
||||
};
|
||||
let Ok(text) = serde_json::to_string(&resp) else { continue };
|
||||
if sink.send(WsMessage::Text(text)).await.is_err() {
|
||||
break;
|
||||
}
|
||||
// Inbound: messages from the sled.
|
||||
msg = stream.next() => {
|
||||
let text = match msg {
|
||||
Some(Ok(WsMessage::Text(t))) => t,
|
||||
Some(Ok(WsMessage::Close(_))) | None => break,
|
||||
_ => continue,
|
||||
};
|
||||
let Ok(env) = serde_json::from_str::<UplinkEnvelope>(&text) else {
|
||||
continue;
|
||||
};
|
||||
match env.msg_type.as_str() {
|
||||
"heartbeat" => {
|
||||
conn.last_heartbeat_ms.store(
|
||||
chrono::Utc::now().timestamp_millis(),
|
||||
std::sync::atomic::Ordering::Relaxed,
|
||||
);
|
||||
}
|
||||
"mcp_response" => {
|
||||
let tx_opt = conn.in_flight.lock().await.remove(&env.req_id);
|
||||
if let Some(tx) = tx_opt {
|
||||
let _ = tx.send(env.payload);
|
||||
}
|
||||
}
|
||||
"perm_request" => {
|
||||
forward_perm_request(env, &perm_tx, &agg_tx, &identity);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// Aggregator: per-request permission resolutions get formatted as perm_response.
|
||||
Some((req_id, decision)) = agg_rx.recv() => {
|
||||
use crate::http::context::PermissionDecision;
|
||||
let (approved, always_allow) = match decision {
|
||||
PermissionDecision::AlwaysAllow => (true, true),
|
||||
PermissionDecision::Approve => (true, false),
|
||||
PermissionDecision::Deny => (false, false),
|
||||
};
|
||||
let resp = UplinkEnvelope {
|
||||
msg_type: "perm_response".to_string(),
|
||||
req_id,
|
||||
payload: serde_json::json!({
|
||||
"approved": approved,
|
||||
"always_allow": always_allow,
|
||||
}),
|
||||
};
|
||||
let Ok(text) = serde_json::to_string(&resp) else { continue };
|
||||
if sink.send(WsMessage::Text(text)).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
crate::slog!("[gateway/sled-uplink] Sled '{}' disconnected", sled_id);
|
||||
})
|
||||
.into_response()
|
||||
state.deregister_sled_connection(&identity).await;
|
||||
crate::slog!("[gateway/sled-uplink] Sled '{}' disconnected", identity);
|
||||
}
|
||||
|
||||
/// Read frames until an `identity` envelope is seen; return the project name
|
||||
/// from its payload. Returns `None` if the stream ends or an invalid frame
|
||||
/// arrives where the first frame is expected to be `identity`.
|
||||
async fn wait_for_identity(
|
||||
stream: &mut futures::stream::SplitStream<poem::web::websocket::WebSocketStream>,
|
||||
) -> Option<String> {
|
||||
while let Some(msg) = stream.next().await {
|
||||
let text = match msg {
|
||||
Ok(WsMessage::Text(t)) => t,
|
||||
Ok(WsMessage::Close(_)) | Err(_) => return None,
|
||||
_ => continue,
|
||||
};
|
||||
let Ok(env) = serde_json::from_str::<crate::sled_uplink::UplinkEnvelope>(&text) else {
|
||||
return None;
|
||||
};
|
||||
if env.msg_type != "identity" {
|
||||
return None;
|
||||
}
|
||||
return env
|
||||
.payload
|
||||
.get("project")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_string);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Convert an inbound `perm_request` envelope into a [`PermissionForward`] and
|
||||
/// inject it into the gateway's permission pipeline. The spawned waiter task
|
||||
/// publishes the resolved decision into the aggregator channel so the WS
|
||||
/// writer can emit a matching `perm_response` frame.
|
||||
fn forward_perm_request(
|
||||
env: crate::sled_uplink::UplinkEnvelope,
|
||||
perm_tx: &tokio::sync::mpsc::UnboundedSender<crate::http::context::PermissionForward>,
|
||||
agg_tx: &tokio::sync::mpsc::UnboundedSender<(String, crate::http::context::PermissionDecision)>,
|
||||
sled_id: &str,
|
||||
) {
|
||||
let req_id = env.req_id.clone();
|
||||
let tool_name = env
|
||||
.payload
|
||||
.get("tool_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let tool_input = env
|
||||
.payload
|
||||
.get("tool_input")
|
||||
.cloned()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
let (response_tx, response_rx) = tokio::sync::oneshot::channel();
|
||||
let fwd = crate::http::context::PermissionForward {
|
||||
request_id: format!("{sled_id}:{req_id}"),
|
||||
tool_name,
|
||||
tool_input,
|
||||
response_tx,
|
||||
};
|
||||
if perm_tx.send(fwd).is_err() {
|
||||
return;
|
||||
}
|
||||
let agg_tx2 = agg_tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let decision = response_rx
|
||||
.await
|
||||
.unwrap_or(crate::http::context::PermissionDecision::Deny);
|
||||
let _ = agg_tx2.send((req_id, decision));
|
||||
});
|
||||
}
|
||||
|
||||
// ── Event-push WebSocket handler ─────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user