huskies: merge 760
This commit is contained in:
@@ -12,9 +12,9 @@ use std::sync::Arc;
|
||||
|
||||
// Re-export public types that callers reference as `crate::gateway::*`.
|
||||
pub use crate::service::gateway::{
|
||||
GatewayConfig, GatewayState as GatewayStateType, JoinedAgent, ProjectEntry,
|
||||
fetch_all_project_pipeline_statuses, format_aggregate_status_compact,
|
||||
spawn_gateway_notification_poller,
|
||||
GatewayConfig, GatewayState as GatewayStateType, GatewayStatusEvent, JoinedAgent, ProjectEntry,
|
||||
broadcast_status_event, fetch_all_project_pipeline_statuses, format_aggregate_status_compact,
|
||||
spawn_gateway_notification_poller, subscribe_status_events,
|
||||
};
|
||||
|
||||
/// Build the complete gateway route tree.
|
||||
@@ -70,6 +70,10 @@ pub fn build_gateway_route(state_arc: Arc<GatewayState>) -> impl poem::Endpoint
|
||||
"/gateway/agents/:id/heartbeat",
|
||||
poem::post(gateway_heartbeat_handler),
|
||||
)
|
||||
.at(
|
||||
"/gateway/events/push",
|
||||
poem::get(gateway_event_push_handler),
|
||||
)
|
||||
// Serve the embedded React frontend so the gateway has a UI.
|
||||
.at(
|
||||
"/assets/*path",
|
||||
|
||||
@@ -4,9 +4,12 @@
|
||||
//! the response. No inline business logic, no `reqwest`, no filesystem access.
|
||||
|
||||
use crate::service::gateway::{self, GatewayState};
|
||||
use futures::StreamExt;
|
||||
use poem::handler;
|
||||
use poem::http::StatusCode;
|
||||
use poem::web::Path as PoemPath;
|
||||
use poem::web::Query;
|
||||
use poem::web::websocket::{Message as WsMessage, WebSocket};
|
||||
use poem::web::{Data, Json};
|
||||
use poem::{Body, Request, Response};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -631,6 +634,119 @@ pub async fn gateway_heartbeat_handler(
|
||||
}
|
||||
}
|
||||
|
||||
// ── Event-push WebSocket handler ────────────────────────────────────────────
|
||||
|
||||
/// Query parameters accepted on the `/gateway/events/push` WebSocket upgrade.
|
||||
#[derive(Deserialize)]
|
||||
struct EventPushQueryParams {
|
||||
/// One-time join token generated by `POST /gateway/tokens`.
|
||||
token: Option<String>,
|
||||
/// The project name this node represents (e.g. `"huskies"`).
|
||||
project: Option<String>,
|
||||
}
|
||||
|
||||
/// `GET /gateway/events/push` — WebSocket endpoint for project nodes to push
|
||||
/// [`StatusEvent`] frames to the gateway.
|
||||
///
|
||||
/// # Authentication
|
||||
///
|
||||
/// The connecting node must supply a valid one-time join token via the `token`
|
||||
/// query parameter, obtained from `POST /gateway/tokens`. The token is
|
||||
/// consumed on the first successful upgrade — the connection itself is then
|
||||
/// kept open indefinitely.
|
||||
///
|
||||
/// # Protocol
|
||||
///
|
||||
/// Each message from the project node must be a JSON-encoded
|
||||
/// [`crate::service::events::StoredEvent`]. The gateway fan-outs the event
|
||||
/// (tagged with the project name) to all current local subscribers.
|
||||
///
|
||||
/// The server does not send data back; clients should treat any close frame
|
||||
/// as a signal to reconnect with exponential back-off (see docs/gateway-protocol.html).
|
||||
///
|
||||
/// # Reconnect-with-backoff
|
||||
///
|
||||
/// Project nodes MUST reconnect on disconnect. Recommended policy:
|
||||
///
|
||||
/// - Initial retry delay: **1 s**
|
||||
/// - Back-off multiplier: **2×** per attempt
|
||||
/// - Max delay cap: **60 s**
|
||||
/// - Jitter: add ±10 % to the delay to avoid thundering herds
|
||||
#[handler]
|
||||
pub async fn gateway_event_push_handler(
|
||||
ws: WebSocket,
|
||||
state: Data<&Arc<GatewayState>>,
|
||||
Query(params): Query<EventPushQueryParams>,
|
||||
) -> poem::Response {
|
||||
// ── Authentication (pre-upgrade) ─────────────────────────────────────
|
||||
let token = match params.token {
|
||||
Some(t) if !t.is_empty() => t,
|
||||
_ => {
|
||||
return poem::Response::builder()
|
||||
.status(StatusCode::UNAUTHORIZED)
|
||||
.body("token query parameter required");
|
||||
}
|
||||
};
|
||||
|
||||
let project = match params.project {
|
||||
Some(p) if !p.is_empty() => p,
|
||||
_ => {
|
||||
return poem::Response::builder()
|
||||
.status(StatusCode::BAD_REQUEST)
|
||||
.body("project query parameter required");
|
||||
}
|
||||
};
|
||||
|
||||
// Validate and consume the one-time token.
|
||||
{
|
||||
let mut tokens = state.pending_tokens.write().await;
|
||||
if !tokens.contains_key(&token) {
|
||||
return poem::Response::builder()
|
||||
.status(StatusCode::UNAUTHORIZED)
|
||||
.body("invalid or already-used join token");
|
||||
}
|
||||
tokens.remove(&token);
|
||||
}
|
||||
|
||||
// ── WebSocket upgrade ────────────────────────────────────────────────
|
||||
use poem::IntoResponse as _;
|
||||
let state = Arc::clone(&state);
|
||||
ws.on_upgrade(move |socket| async move {
|
||||
let (_, mut stream) = socket.split();
|
||||
|
||||
crate::slog!(
|
||||
"[gateway] Project node '{}' connected to event-push endpoint",
|
||||
project
|
||||
);
|
||||
|
||||
while let Some(msg) = stream.next().await {
|
||||
let text = match msg {
|
||||
Ok(WsMessage::Text(t)) => t,
|
||||
Ok(WsMessage::Close(_)) | Err(_) => break,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
match serde_json::from_str::<crate::service::events::StoredEvent>(&text) {
|
||||
Ok(event) => {
|
||||
gateway::broadcast_status_event(&state, project.clone(), event);
|
||||
}
|
||||
Err(e) => {
|
||||
crate::slog!(
|
||||
"[gateway] event-push: invalid frame from '{}': {e}",
|
||||
project
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
crate::slog!(
|
||||
"[gateway] Project node '{}' disconnected from event-push endpoint",
|
||||
project
|
||||
);
|
||||
})
|
||||
.into_response()
|
||||
}
|
||||
|
||||
// ── Health handler ──────────────────────────────────────────────────────────
|
||||
|
||||
/// HTTP GET `/health` handler for the gateway.
|
||||
|
||||
@@ -26,6 +26,21 @@ use std::sync::Arc;
|
||||
use tokio::sync::Mutex as TokioMutex;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
// ── Status event broadcaster ────────────────────────────────────────────────
|
||||
|
||||
/// Capacity of the gateway status event broadcast channel.
|
||||
const EVENT_CHANNEL_CAPACITY: usize = 64;
|
||||
|
||||
/// A status event pushed by a project node and fanned out to all local
|
||||
/// subscribers (e.g. the Web UI, notification forwarders).
|
||||
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct GatewayStatusEvent {
|
||||
/// The project name that emitted this event.
|
||||
pub project: String,
|
||||
/// The pipeline event payload.
|
||||
pub event: crate::service::events::StoredEvent,
|
||||
}
|
||||
|
||||
// ── Error type ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// Typed errors returned by `service::gateway` functions.
|
||||
@@ -93,6 +108,10 @@ pub struct GatewayState {
|
||||
pub port: u16,
|
||||
/// Abort handle for the running Matrix bot task (if any).
|
||||
pub bot_handle: Arc<TokioMutex<Option<tokio::task::AbortHandle>>>,
|
||||
/// Broadcast sender for [`GatewayStatusEvent`]s pushed by project nodes.
|
||||
///
|
||||
/// Call `event_tx.subscribe()` to obtain a receiver for outbound fan-out.
|
||||
pub event_tx: tokio::sync::broadcast::Sender<GatewayStatusEvent>,
|
||||
}
|
||||
|
||||
impl GatewayState {
|
||||
@@ -107,6 +126,7 @@ impl GatewayState {
|
||||
) -> Result<Self, String> {
|
||||
let first = config::validate_config(&gateway_config)?;
|
||||
let agents = io::load_agents(&config_dir);
|
||||
let (event_tx, _) = tokio::sync::broadcast::channel(EVENT_CHANNEL_CAPACITY);
|
||||
Ok(Self {
|
||||
projects: Arc::new(RwLock::new(gateway_config.projects)),
|
||||
active_project: Arc::new(RwLock::new(first)),
|
||||
@@ -116,6 +136,7 @@ impl GatewayState {
|
||||
config_dir,
|
||||
port,
|
||||
bot_handle: Arc::new(TokioMutex::new(None)),
|
||||
event_tx,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -380,6 +401,32 @@ pub async fn health_check_all(state: &GatewayState) -> (bool, BTreeMap<String, &
|
||||
(all_healthy, statuses)
|
||||
}
|
||||
|
||||
/// Broadcast a status event received from a project node to all local subscribers.
|
||||
///
|
||||
/// Returns the number of active receivers that received the event.
|
||||
/// A return value of zero means no subscribers are currently connected.
|
||||
pub fn broadcast_status_event(
|
||||
state: &GatewayState,
|
||||
project: String,
|
||||
event: crate::service::events::StoredEvent,
|
||||
) -> usize {
|
||||
let msg = GatewayStatusEvent { project, event };
|
||||
state.event_tx.send(msg).unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Subscribe to the gateway's status event stream.
|
||||
///
|
||||
/// Returns a broadcast receiver that will yield [`GatewayStatusEvent`]s as
|
||||
/// project nodes push them. If the receiver falls behind (more than
|
||||
/// [`EVENT_CHANNEL_CAPACITY`] events are queued), it will receive a
|
||||
/// [`tokio::sync::broadcast::error::RecvError::Lagged`] error; callers
|
||||
/// should discard lagged events and continue.
|
||||
pub fn subscribe_status_events(
|
||||
state: &GatewayState,
|
||||
) -> tokio::sync::broadcast::Receiver<GatewayStatusEvent> {
|
||||
state.event_tx.subscribe()
|
||||
}
|
||||
|
||||
/// Save bot config and restart the bot.
|
||||
pub async fn save_bot_config_and_restart(state: &GatewayState, content: &str) -> Result<(), Error> {
|
||||
io::write_bot_config(&state.config_dir, content).map_err(Error::Config)?;
|
||||
|
||||
Reference in New Issue
Block a user