Files
huskies/.huskies/specs/tech/fly_multitenant_poc.sh
T
2026-05-14 18:32:37 +00:00

102 lines
3.7 KiB
Bash
Executable File

#!/usr/bin/env bash
# fly_multitenant_poc.sh — Proof of concept for Spike 811.
#
# Demonstrates the Fly.io Machines API calls that the huskies gateway
# will eventually make to provision and tear down a per-tenant project
# machine. Run against a real Fly org with FLY_API_TOKEN set, or read it
# as a commented sketch — the calls are the contract.
#
# This is NOT production code. Production will issue these requests
# from Rust (see server::service::cloud::fly) with retries, structured
# errors, and CRDT writes to record machine_id/volume_id. The shell
# script exists so the spec is verifiable end-to-end.
#
# Required env:
# FLY_API_TOKEN - org-scoped Fly token
# FLY_APP - name of the huskies-projects Fly app (must exist)
# TENANT_ID - identifier used to tag and name the machine
# REGION - Fly region code, e.g. "iad" (default: iad)
set -euo pipefail
: "${FLY_API_TOKEN:?FLY_API_TOKEN must be set}"
: "${FLY_APP:?FLY_APP must be set}"
: "${TENANT_ID:?TENANT_ID must be set}"
REGION="${REGION:-iad}"
IMAGE="registry.fly.io/huskies-projects:latest"
API="https://api.machines.dev/v1"
AUTH=(-H "Authorization: Bearer ${FLY_API_TOKEN}" -H "Content-Type: application/json")
echo "==> 1. Create a 1 GiB persistent volume for tenant ${TENANT_ID}"
VOLUME_JSON=$(curl -sS -X POST "${API}/apps/${FLY_APP}/volumes" "${AUTH[@]}" --data @- <<EOF
{
"name": "huskies_${TENANT_ID}",
"region": "${REGION}",
"size_gb": 1
}
EOF
)
VOLUME_ID=$(echo "${VOLUME_JSON}" | jq -r .id)
echo " volume_id = ${VOLUME_ID}"
echo "==> 2. Create a machine attached to the volume, with auto-suspend"
MACHINE_JSON=$(curl -sS -X POST "${API}/apps/${FLY_APP}/machines" "${AUTH[@]}" --data @- <<EOF
{
"name": "huskies-${TENANT_ID}",
"region": "${REGION}",
"config": {
"image": "${IMAGE}",
"env": {
"TENANT_ID": "${TENANT_ID}",
"HUSKIES_PORT": "3001",
"PRIMARY_REGION": "${REGION}"
},
"guest": { "cpu_kind": "shared", "cpus": 2, "memory_mb": 2048 },
"mounts": [ { "volume": "${VOLUME_ID}", "path": "/data" } ],
"services": [ {
"ports": [
{ "port": 443, "handlers": ["tls","http"] },
{ "port": 80, "handlers": ["http"] }
],
"protocol": "tcp",
"internal_port": 3001,
"auto_stop_machines": "suspend",
"auto_start_machines": true,
"min_machines_running": 0
} ],
"metadata": { "tenant": "${TENANT_ID}", "managed_by": "huskies-gw" },
"restart": { "policy": "on-failure", "max_retries": 5 }
}
}
EOF
)
MACHINE_ID=$(echo "${MACHINE_JSON}" | jq -r .id)
PRIVATE_IP=$(echo "${MACHINE_JSON}" | jq -r .private_ip)
echo " machine_id = ${MACHINE_ID}"
echo " private_ip = ${PRIVATE_IP}"
echo "==> 3. Wait for the machine to reach 'started' (long-poll, 60s timeout)"
curl -sS "${API}/apps/${FLY_APP}/machines/${MACHINE_ID}/wait?state=started&timeout=60" "${AUTH[@]}" \
| jq -r '" state = " + .ok'
echo " machine reachable at ${MACHINE_ID}.vm.${FLY_APP}.internal:3001"
# ----- At this point the gateway would record (tenant, machine_id, volume_id)
# ----- into the CRDT and start proxying traffic. We pause here.
sleep 2
echo "==> 4. Graceful stop (lets sled flush; idle-suspend uses the same path)"
curl -sS -X POST "${API}/apps/${FLY_APP}/machines/${MACHINE_ID}/stop" "${AUTH[@]}" \
--data '{"signal":"SIGTERM","timeout":"30s"}' > /dev/null
echo "==> 5. Destroy the machine"
curl -sS -X DELETE "${API}/apps/${FLY_APP}/machines/${MACHINE_ID}?force=true" "${AUTH[@]}" > /dev/null
echo " machine destroyed"
echo "==> 6. Reclaim the volume (only when the tenant deletes the project)"
curl -sS -X DELETE "${API}/apps/${FLY_APP}/volumes/${VOLUME_ID}" "${AUTH[@]}" > /dev/null
echo " volume reclaimed"
echo "==> done."