mirror of
https://codeberg.org/Toasterson/solstice-ci.git
synced 2026-04-10 13:20:41 +00:00
Move runner logs to debug level and enable runner binary serving via orchestrator
This commit includes: - Adjusted runner logs from `info` to `debug` for reduced deployment log verbosity while retaining visibility in CI. - Added functionality to serve runner binaries directly from the orchestrator via HTTP. - Introduced new `RUNNER_DIR` configuration to specify the binary directory, with default paths and URL composition. - Updated HTTP routing to include runner file serving with validation and logging. - Improved AMQP body logging with a utility for better error debugging. - Updated task scripts for runner cross-building and serving, consolidating configurations and removing redundant files.
This commit is contained in:
parent
7ea24af24f
commit
97599eb48d
10 changed files with 172 additions and 147 deletions
|
|
@ -2,9 +2,11 @@
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
# Cross-build the workflow-runner for Linux and Illumos targets.
|
# Cross-build the workflow-runner for Linux and Illumos targets.
|
||||||
# Requires: cross (https://github.com/cross-rs/cross)
|
# Requires: cross (https://github.com/cross-rs/cross)
|
||||||
# Outputs:
|
# Builds and stages runner binaries under target/runners for the orchestrator HTTP server.
|
||||||
# - target/x86_64-unknown-linux-gnu/release/solstice-runner
|
# Outputs (staged for serving by orchestrator):
|
||||||
# - target/x86_64-unknown-illumos/release/solstice-runner
|
# - target/runners/solstice-runner-linux
|
||||||
|
# - target/runners/solstice-runner-illumos
|
||||||
|
# - target/runners/solstice-runner (symlink to -linux by default)
|
||||||
|
|
||||||
ROOT_DIR=$(cd "$(dirname "$0")/../../.." && pwd)
|
ROOT_DIR=$(cd "$(dirname "$0")/../../.." && pwd)
|
||||||
cd "$ROOT_DIR"
|
cd "$ROOT_DIR"
|
||||||
|
|
@ -19,6 +21,29 @@ cross build -p workflow-runner --target x86_64-unknown-linux-gnu --release
|
||||||
# Build Illumos runner
|
# Build Illumos runner
|
||||||
cross build -p workflow-runner --target x86_64-unknown-illumos --release
|
cross build -p workflow-runner --target x86_64-unknown-illumos --release
|
||||||
|
|
||||||
|
LIN_BIN="${ROOT_DIR}/target/x86_64-unknown-linux-gnu/release/solstice-runner"
|
||||||
|
ILL_BIN="${ROOT_DIR}/target/x86_64-unknown-illumos/release/solstice-runner"
|
||||||
|
SERVE_DIR="${ROOT_DIR}/target/runners"
|
||||||
|
mkdir -p "$SERVE_DIR"
|
||||||
|
|
||||||
|
# Stage with orchestrator-expected filenames
|
||||||
|
if [[ -f "$LIN_BIN" ]]; then
|
||||||
|
cp -f "$LIN_BIN" "$SERVE_DIR/solstice-runner-linux"
|
||||||
|
chmod +x "$SERVE_DIR/solstice-runner-linux" || true
|
||||||
|
fi
|
||||||
|
if [[ -f "$ILL_BIN" ]]; then
|
||||||
|
cp -f "$ILL_BIN" "$SERVE_DIR/solstice-runner-illumos"
|
||||||
|
chmod +x "$SERVE_DIR/solstice-runner-illumos" || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Provide a generic solstice-runner as a convenience (symlink to linux)
|
||||||
|
if [[ -f "$SERVE_DIR/solstice-runner-linux" ]]; then
|
||||||
|
ln -sf "solstice-runner-linux" "$SERVE_DIR/solstice-runner"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Built runner binaries:" >&2
|
echo "Built runner binaries:" >&2
|
||||||
ls -l "${ROOT_DIR}/target/x86_64-unknown-linux-gnu/release/solstice-runner" 2>/dev/null || true
|
ls -l "$LIN_BIN" 2>/dev/null || true
|
||||||
ls -l "${ROOT_DIR}/target/x86_64-unknown-illumos/release/solstice-runner" 2>/dev/null || true
|
ls -l "$ILL_BIN" 2>/dev/null || true
|
||||||
|
|
||||||
|
echo "Staged for orchestrator serving under $SERVE_DIR:" >&2
|
||||||
|
ls -l "$SERVE_DIR" 2>/dev/null || true
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
# Run the Solstice Orchestrator with sensible local defaults
|
# Run the Solstice Orchestrator with sensible local defaults
|
||||||
export RUST_LOG=${RUST_LOG:-info}
|
# Default to info globally, but enable orchestrator debug to surface runner logs in CI
|
||||||
|
export RUST_LOG=${RUST_LOG:-info,orchestrator=debug}
|
||||||
export ORCH_CONFIG=${ORCH_CONFIG:-examples/orchestrator-image-map.yaml}
|
export ORCH_CONFIG=${ORCH_CONFIG:-examples/orchestrator-image-map.yaml}
|
||||||
export AMQP_URL=${AMQP_URL:-amqp://127.0.0.1:5672/%2f}
|
export AMQP_URL=${AMQP_URL:-amqp://127.0.0.1:5672/%2f}
|
||||||
export AMQP_EXCHANGE=${AMQP_EXCHANGE:-solstice.jobs}
|
export AMQP_EXCHANGE=${AMQP_EXCHANGE:-solstice.jobs}
|
||||||
|
|
@ -20,25 +21,16 @@ fi
|
||||||
# Contact address for gRPC log streaming from guests (used in cloud-init)
|
# Contact address for gRPC log streaming from guests (used in cloud-init)
|
||||||
export ORCH_CONTACT_ADDR=${ORCH_CONTACT_ADDR:-$HOST_IP:50051}
|
export ORCH_CONTACT_ADDR=${ORCH_CONTACT_ADDR:-$HOST_IP:50051}
|
||||||
|
|
||||||
# Auto-compose runner URLs if not provided, to match ci:vm-build behavior
|
# Serve runner binaries directly from the orchestrator unless overridden
|
||||||
# You must run a runner server in another terminal first:
|
# Set RUNNER_DIR to a directory containing files:
|
||||||
# mise run run:runner-serve-multi (serves on 8090/8091 by default)
|
# - solstice-runner (generic)
|
||||||
# or: mise run run:runner-serve (serves on 8089 by default)
|
# - solstice-runner-linux (Linux-specific)
|
||||||
if [[ -z "${SOLSTICE_RUNNER_URLS:-}" && -z "${SOLSTICE_RUNNER_URL:-}" ]]; then
|
# - solstice-runner-illumos (Illumos-specific)
|
||||||
# Multi-OS defaults
|
export RUNNER_DIR=${RUNNER_DIR:-target/runners}
|
||||||
SOL_RUNNER_PORT_LINUX=${SOL_RUNNER_PORT_LINUX:-8090}
|
# If user didn't provide external URLs, the orchestrator will compose URLs like
|
||||||
SOL_RUNNER_PORT_ILLUMOS=${SOL_RUNNER_PORT_ILLUMOS:-8091}
|
# http://$HOST_IP:${HTTP_PORT:-8081}/runners/solstice-runner[-linux|-illumos]
|
||||||
LINUX_URL="http://$HOST_IP:$SOL_RUNNER_PORT_LINUX/solstice-runner-linux"
|
# automatically when RUNNER_DIR is set.
|
||||||
ILLUMOS_URL="http://$HOST_IP:$SOL_RUNNER_PORT_ILLUMOS/solstice-runner-illumos"
|
# To force external URLs instead, set SOLSTICE_RUNNER_URL(S) explicitly before running this task.
|
||||||
export SOLSTICE_RUNNER_URLS="$LINUX_URL $ILLUMOS_URL"
|
|
||||||
# Also set single-runner URL fallback if someone uses run:runner-serve
|
|
||||||
SOL_RUNNER_PORT=${SOL_RUNNER_PORT:-8089}
|
|
||||||
export SOLSTICE_RUNNER_URL=${SOLSTICE_RUNNER_URL:-"http://$HOST_IP:$SOL_RUNNER_PORT/solstice-runner"}
|
|
||||||
echo "Using default runner URLs:" >&2
|
|
||||||
echo " SOLSTICE_RUNNER_URLS=$SOLSTICE_RUNNER_URLS" >&2
|
|
||||||
echo " SOLSTICE_RUNNER_URL=$SOLSTICE_RUNNER_URL" >&2
|
|
||||||
echo "Override by exporting SOLSTICE_RUNNER_URLS or SOLSTICE_RUNNER_URL before running this task." >&2
|
|
||||||
fi
|
|
||||||
|
|
||||||
# For Linux + libvirt users, customize via LIBVIRT_URI and LIBVIRT_NETWORK
|
# For Linux + libvirt users, customize via LIBVIRT_URI and LIBVIRT_NETWORK
|
||||||
exec cargo run -p orchestrator --features libvirt -- \
|
exec cargo run -p orchestrator --features libvirt -- \
|
||||||
|
|
|
||||||
|
|
@ -1,44 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
# Serve the built workflow-runner binary over HTTP for local VMs to download.
|
|
||||||
# This is intended for local development only.
|
|
||||||
#
|
|
||||||
# Env:
|
|
||||||
# SOL_RUNNER_PORT - port to bind (default: 8089)
|
|
||||||
# SOL_RUNNER_BIND - bind address (default: 0.0.0.0)
|
|
||||||
# SOL_RUNNER_BINARY - path to runner binary (default: target/debug/solstice-runner)
|
|
||||||
#
|
|
||||||
# The file will be exposed at http://HOST:PORT/solstice-runner
|
|
||||||
|
|
||||||
ROOT_DIR=$(cd "$(dirname "$0")/../../.." && pwd)
|
|
||||||
cd "$ROOT_DIR"
|
|
||||||
|
|
||||||
command -v cargo >/dev/null 2>&1 || { echo "cargo is required" >&2; exit 127; }
|
|
||||||
PYTHON=${PYTHON:-python3}
|
|
||||||
if ! command -v "$PYTHON" >/dev/null 2>&1; then
|
|
||||||
echo "python3 is required to run a simple HTTP server" >&2
|
|
||||||
exit 127
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Build runner if not present
|
|
||||||
BINARY_DEFAULT="$ROOT_DIR/target/debug/solstice-runner"
|
|
||||||
export SOL_RUNNER_BINARY=${SOL_RUNNER_BINARY:-$BINARY_DEFAULT}
|
|
||||||
if [[ ! -x "$SOL_RUNNER_BINARY" ]]; then
|
|
||||||
cargo build -p workflow-runner >/dev/null
|
|
||||||
if [[ ! -x "$SOL_RUNNER_BINARY" ]]; then
|
|
||||||
echo "runner binary not found at $SOL_RUNNER_BINARY after build" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Prepare serve dir under target
|
|
||||||
SERVE_DIR="$ROOT_DIR/target/runner-serve"
|
|
||||||
mkdir -p "$SERVE_DIR"
|
|
||||||
cp -f "$SOL_RUNNER_BINARY" "$SERVE_DIR/solstice-runner"
|
|
||||||
chmod +x "$SERVE_DIR/solstice-runner" || true
|
|
||||||
|
|
||||||
PORT=${SOL_RUNNER_PORT:-8089}
|
|
||||||
BIND=${SOL_RUNNER_BIND:-0.0.0.0}
|
|
||||||
|
|
||||||
echo "Serving solstice-runner from $SERVE_DIR on http://$BIND:$PORT (Ctrl-C to stop)" >&2
|
|
||||||
exec "$PYTHON" -m http.server "$PORT" --bind "$BIND" --directory "$SERVE_DIR"
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
# Serve cross-built workflow-runner binaries for Linux and Illumos on two ports.
|
|
||||||
# Intended for local development only.
|
|
||||||
# Env:
|
|
||||||
# SOL_RUNNER_BIND - bind address (default: 0.0.0.0)
|
|
||||||
# SOL_RUNNER_PORT_LINUX - port for Linux runner (default: 8090)
|
|
||||||
# SOL_RUNNER_PORT_ILLUMOS - port for Illumos runner (default: 8091)
|
|
||||||
# PYTHON - python interpreter (default: python3)
|
|
||||||
#
|
|
||||||
# Exposes:
|
|
||||||
# http://HOST:PORT/solstice-runner-linux
|
|
||||||
# http://HOST:PORT/solstice-runner-illumos
|
|
||||||
|
|
||||||
ROOT_DIR=$(cd "$(dirname "$0")/../../.." && pwd)
|
|
||||||
cd "$ROOT_DIR"
|
|
||||||
|
|
||||||
PYTHON=${PYTHON:-python3}
|
|
||||||
command -v "$PYTHON" >/dev/null 2>&1 || { echo "python3 is required" >&2; exit 127; }
|
|
||||||
|
|
||||||
# Ensure cross-built artifacts exist
|
|
||||||
if [[ ! -x "$ROOT_DIR/target/x86_64-unknown-linux-gnu/release/solstice-runner" || ! -x "$ROOT_DIR/target/x86_64-unknown-illumos/release/solstice-runner" ]]; then
|
|
||||||
echo "Cross-built runner binaries not found; building with cross..." >&2
|
|
||||||
"$ROOT_DIR/.mise/tasks/build/runner-cross"
|
|
||||||
fi
|
|
||||||
|
|
||||||
SERVE_DIR="$ROOT_DIR/target/runner-serve-multi"
|
|
||||||
rm -rf "$SERVE_DIR"
|
|
||||||
mkdir -p "$SERVE_DIR"
|
|
||||||
cp -f "$ROOT_DIR/target/x86_64-unknown-linux-gnu/release/solstice-runner" "$SERVE_DIR/solstice-runner-linux"
|
|
||||||
cp -f "$ROOT_DIR/target/x86_64-unknown-illumos/release/solstice-runner" "$SERVE_DIR/solstice-runner-illumos"
|
|
||||||
chmod +x "$SERVE_DIR/solstice-runner-linux" "$SERVE_DIR/solstice-runner-illumos" || true
|
|
||||||
|
|
||||||
BIND=${SOL_RUNNER_BIND:-0.0.0.0}
|
|
||||||
PORT_LIN=${SOL_RUNNER_PORT_LINUX:-8090}
|
|
||||||
PORT_ILL=${SOL_RUNNER_PORT_ILLUMOS:-8091}
|
|
||||||
|
|
||||||
echo "Serving from $SERVE_DIR" >&2
|
|
||||||
|
|
||||||
set +e
|
|
||||||
"$PYTHON" -m http.server "$PORT_LIN" --bind "$BIND" --directory "$SERVE_DIR" &
|
|
||||||
PID_LIN=$!
|
|
||||||
"$PYTHON" -m http.server "$PORT_ILL" --bind "$BIND" --directory "$SERVE_DIR" &
|
|
||||||
PID_ILL=$!
|
|
||||||
set -e
|
|
||||||
|
|
||||||
trap 'kill $PID_LIN $PID_ILL 2>/dev/null || true' INT TERM EXIT
|
|
||||||
|
|
||||||
echo "Linux runner: http://$BIND:$PORT_LIN/solstice-runner-linux" >&2
|
|
||||||
echo "Illumos runner: http://$BIND:$PORT_ILL/solstice-runner-illumos" >&2
|
|
||||||
|
|
||||||
# Wait on background servers
|
|
||||||
wait
|
|
||||||
9
TODO.txt
9
TODO.txt
|
|
@ -1,9 +1,6 @@
|
||||||
- Make RabbitMQ Messages Print nicely
|
|
||||||
- move runner logs to debug level so they can be logged in the CI job but don't spam the deployed version
|
|
||||||
- Make Orchestrator serve the runner binaries so no external server is needed
|
|
||||||
- Make orchestrator detect the address it will be reachable by checking the libvirt config or on illumos use it's external IP
|
- Make orchestrator detect the address it will be reachable by checking the libvirt config or on illumos use it's external IP
|
||||||
- Make VM reachable IP of the orchestrator configurable in case the setup on illumos gets more complicated (via config file)
|
- Make VM reachable IP of the orchestrator configurable in case the setup on illumos gets more complicated (via config file)
|
||||||
- Make the forge-integration task use fnox secrets
|
- Make the forge-integration task use fnox secrets
|
||||||
|
|
||||||
|
|
||||||
Testing repo commit status
|
|
||||||
|
|
@ -16,6 +16,41 @@ use tracing::{error, info, instrument, warn};
|
||||||
|
|
||||||
use crate::messages::{JobRequest, JobResult};
|
use crate::messages::{JobRequest, JobResult};
|
||||||
|
|
||||||
|
/// Pretty-print an AMQP message body for logs.
|
||||||
|
/// - If valid UTF-8 JSON, pretty-format it.
|
||||||
|
/// - If valid UTF-8 text, return as-is.
|
||||||
|
/// - Otherwise, return a hex preview with ASCII sidecar.
|
||||||
|
pub fn pretty_amqp_body(data: &[u8]) -> String {
|
||||||
|
// Try JSON first when it looks like JSON
|
||||||
|
let looks_json = data.iter().skip_while(|b| b.is_ascii_whitespace()).next().map(|b| *b == b'{' || *b == b'[').unwrap_or(false);
|
||||||
|
if looks_json {
|
||||||
|
if let Ok(v) = serde_json::from_slice::<serde_json::Value>(data) {
|
||||||
|
if let Ok(s) = serde_json::to_string_pretty(&v) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// UTF-8 fallback
|
||||||
|
if let Ok(s) = std::str::from_utf8(data) {
|
||||||
|
return s.to_string();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hex + ASCII fallback (preview up to 256 bytes)
|
||||||
|
let max = 256.min(data.len());
|
||||||
|
let mut hex = String::with_capacity(max * 2);
|
||||||
|
let mut ascii = String::with_capacity(max);
|
||||||
|
for &b in &data[..max] {
|
||||||
|
hex.push_str(&format!("{:02x}", b));
|
||||||
|
ascii.push(if b.is_ascii_graphic() || b == b' ' { b as char } else { '.' });
|
||||||
|
}
|
||||||
|
if data.len() > max {
|
||||||
|
format!("<{} bytes> hex:{}… ascii:{}…", data.len(), hex, ascii)
|
||||||
|
} else {
|
||||||
|
format!("<{} bytes> hex:{} ascii:{}", data.len(), hex, ascii)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct MqConfig {
|
pub struct MqConfig {
|
||||||
pub url: String,
|
pub url: String,
|
||||||
|
|
@ -276,7 +311,7 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(error = %e, "failed to deserialize JobRequest; dead-lettering");
|
warn!(error = %e, body = %pretty_amqp_body(&d.data), "failed to deserialize JobRequest; dead-lettering");
|
||||||
channel
|
channel
|
||||||
.basic_nack(tag, BasicNackOptions { requeue: false, multiple: false })
|
.basic_nack(tag, BasicNackOptions { requeue: false, multiple: false })
|
||||||
.await
|
.await
|
||||||
|
|
|
||||||
|
|
@ -341,7 +341,7 @@ async fn consume_job_results(state: Arc<AppState>) -> Result<()> {
|
||||||
.into_diagnostic()?;
|
.into_diagnostic()?;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(error = %e, "failed to parse JobResult; acking");
|
warn!(error = %e, body = %common::mq::pretty_amqp_body(&d.data), "failed to parse JobResult; acking");
|
||||||
channel
|
channel
|
||||||
.basic_ack(tag, lapin::options::BasicAckOptions { multiple: false })
|
.basic_ack(tag, lapin::options::BasicAckOptions { multiple: false })
|
||||||
.await
|
.await
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ use futures_util::StreamExt;
|
||||||
use miette::{IntoDiagnostic as _, Result};
|
use miette::{IntoDiagnostic as _, Result};
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use tonic::{Request, Response, Status};
|
use tonic::{Request, Response, Status};
|
||||||
use tracing::{error, info, warn};
|
use tracing::{debug, error, info, warn};
|
||||||
|
|
||||||
use common::runner::v1::{
|
use common::runner::v1::{
|
||||||
Ack, LogItem,
|
Ack, LogItem,
|
||||||
|
|
@ -60,7 +60,7 @@ impl Runner for RunnerSvc {
|
||||||
let mut got_end: bool = false;
|
let mut got_end: bool = false;
|
||||||
let mut seq: i64 = 0;
|
let mut seq: i64 = 0;
|
||||||
|
|
||||||
info!("runner log stream opened");
|
debug!("runner log stream opened");
|
||||||
while let Some(item) = stream
|
while let Some(item) = stream
|
||||||
.next()
|
.next()
|
||||||
.await
|
.await
|
||||||
|
|
@ -71,7 +71,7 @@ impl Runner for RunnerSvc {
|
||||||
if req_id.is_none() {
|
if req_id.is_none() {
|
||||||
match uuid::Uuid::parse_str(&item.request_id) {
|
match uuid::Uuid::parse_str(&item.request_id) {
|
||||||
Ok(u) => {
|
Ok(u) => {
|
||||||
info!(request_id = %u, "runner log stream identified");
|
debug!(request_id = %u, "runner log stream identified");
|
||||||
req_id = Some(u)
|
req_id = Some(u)
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
|
|
@ -84,10 +84,10 @@ impl Runner for RunnerSvc {
|
||||||
common::runner::v1::log_item::Event::Log(chunk) => {
|
common::runner::v1::log_item::Event::Log(chunk) => {
|
||||||
if chunk.stderr {
|
if chunk.stderr {
|
||||||
lines_stderr += 1;
|
lines_stderr += 1;
|
||||||
info!(request_id = %item.request_id, line = %chunk.line, "runner:stderr");
|
debug!(request_id = %item.request_id, line = %chunk.line, "runner:stderr");
|
||||||
} else {
|
} else {
|
||||||
lines_stdout += 1;
|
lines_stdout += 1;
|
||||||
info!(request_id = %item.request_id, line = %chunk.line, "runner:stdout");
|
debug!(request_id = %item.request_id, line = %chunk.line, "runner:stdout");
|
||||||
}
|
}
|
||||||
// Best effort: persist the line if we have a parsed UUID
|
// Best effort: persist the line if we have a parsed UUID
|
||||||
if let Some(id) = req_id {
|
if let Some(id) = req_id {
|
||||||
|
|
@ -103,13 +103,13 @@ impl Runner for RunnerSvc {
|
||||||
repo_url = Some(end.repo_url);
|
repo_url = Some(end.repo_url);
|
||||||
commit_sha = Some(end.commit_sha);
|
commit_sha = Some(end.commit_sha);
|
||||||
got_end = true;
|
got_end = true;
|
||||||
info!(exit_code, success, "runner log stream received End");
|
debug!(exit_code, success, "runner log stream received End");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(lines_stdout, lines_stderr, got_end, "runner log stream closed");
|
debug!(lines_stdout, lines_stderr, got_end, "runner log stream closed");
|
||||||
// Publish final status if we have enough context
|
// Publish final status if we have enough context
|
||||||
if let (Some(id), Some(repo), Some(sha)) =
|
if let (Some(id), Some(repo), Some(sha)) =
|
||||||
(req_id.as_ref(), repo_url.as_ref(), commit_sha.as_ref())
|
(req_id.as_ref(), repo_url.as_ref(), commit_sha.as_ref())
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
use axum::{extract::Path, http::StatusCode, response::{IntoResponse, Response}, routing::get, Router};
|
use axum::{extract::Path, http::StatusCode, response::{IntoResponse, Response}, routing::get, Router};
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tracing::{info, warn};
|
use tokio::fs;
|
||||||
|
use tracing::{debug, info, warn};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::persist::Persist;
|
use crate::persist::Persist;
|
||||||
|
|
@ -9,12 +11,14 @@ use crate::persist::Persist;
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct HttpState {
|
pub struct HttpState {
|
||||||
persist: Arc<Persist>,
|
persist: Arc<Persist>,
|
||||||
|
runner_dir: Option<PathBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_router(persist: Arc<Persist>) -> Router {
|
pub fn build_router(persist: Arc<Persist>, runner_dir: Option<PathBuf>) -> Router {
|
||||||
let state = HttpState { persist };
|
let state = HttpState { persist, runner_dir };
|
||||||
Router::new()
|
Router::new()
|
||||||
.route("/jobs/{request_id}/logs", get(get_logs))
|
.route("/jobs/{request_id}/logs", get(get_logs))
|
||||||
|
.route("/runners/{name}", get(get_runner))
|
||||||
.with_state(state)
|
.with_state(state)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -43,8 +47,43 @@ async fn get_logs(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn serve(addr: SocketAddr, persist: Arc<Persist>, shutdown: impl std::future::Future<Output = ()>) {
|
async fn get_runner(
|
||||||
let app = build_router(persist);
|
Path(name): Path<String>,
|
||||||
|
axum::extract::State(state): axum::extract::State<HttpState>,
|
||||||
|
) -> Response {
|
||||||
|
let Some(dir) = state.runner_dir.as_ref() else {
|
||||||
|
return (StatusCode::SERVICE_UNAVAILABLE, "runner serving disabled").into_response();
|
||||||
|
};
|
||||||
|
// Basic validation: prevent path traversal; allow only simple file names
|
||||||
|
if name.contains('/') || name.contains('\\') || name.starts_with('.') {
|
||||||
|
return StatusCode::BAD_REQUEST.into_response();
|
||||||
|
}
|
||||||
|
let path = dir.join(&name);
|
||||||
|
if !path.starts_with(dir) {
|
||||||
|
return StatusCode::BAD_REQUEST.into_response();
|
||||||
|
}
|
||||||
|
match fs::read(&path).await {
|
||||||
|
Ok(bytes) => {
|
||||||
|
debug!(path = %path.display(), size = bytes.len(), "serving runner binary");
|
||||||
|
(
|
||||||
|
StatusCode::OK,
|
||||||
|
[
|
||||||
|
(axum::http::header::CONTENT_TYPE, "application/octet-stream"),
|
||||||
|
(axum::http::header::CONTENT_DISPOSITION, &format!("attachment; filename=\"{}\"", name)),
|
||||||
|
],
|
||||||
|
bytes,
|
||||||
|
)
|
||||||
|
.into_response()
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!(error = %e, path = %path.display(), "runner file not found");
|
||||||
|
StatusCode::NOT_FOUND.into_response()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn serve(addr: SocketAddr, persist: Arc<Persist>, runner_dir: Option<PathBuf>, shutdown: impl std::future::Future<Output = ()>) {
|
||||||
|
let app = build_router(persist, runner_dir);
|
||||||
info!(%addr, "http server starting");
|
info!(%addr, "http server starting");
|
||||||
let listener = tokio::net::TcpListener::bind(addr).await.expect("bind http");
|
let listener = tokio::net::TcpListener::bind(addr).await.expect("bind http");
|
||||||
let server = axum::serve(listener, app);
|
let server = axum::serve(listener, app);
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ use hypervisor::{JobContext, RouterHypervisor, VmSpec};
|
||||||
use scheduler::{SchedItem, Scheduler};
|
use scheduler::{SchedItem, Scheduler};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::Notify;
|
use tokio::sync::Notify;
|
||||||
|
use std::net::SocketAddr as _;
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(
|
#[command(
|
||||||
|
|
@ -29,6 +30,10 @@ struct Opts {
|
||||||
#[arg(long, env = "ORCH_CONFIG")]
|
#[arg(long, env = "ORCH_CONFIG")]
|
||||||
config: Option<PathBuf>,
|
config: Option<PathBuf>,
|
||||||
|
|
||||||
|
/// Directory to serve runner binaries from (filenames are requested via /runners/{name})
|
||||||
|
#[arg(long, env = "RUNNER_DIR")]
|
||||||
|
runner_dir: Option<PathBuf>,
|
||||||
|
|
||||||
/// Global max concurrency for VM provisioning/execution
|
/// Global max concurrency for VM provisioning/execution
|
||||||
#[arg(long, env = "MAX_CONCURRENCY", default_value_t = 2)]
|
#[arg(long, env = "MAX_CONCURRENCY", default_value_t = 2)]
|
||||||
max_concurrency: usize,
|
max_concurrency: usize,
|
||||||
|
|
@ -144,6 +149,25 @@ async fn main() -> Result<()> {
|
||||||
let orch_contact =
|
let orch_contact =
|
||||||
std::env::var("ORCH_CONTACT_ADDR").unwrap_or_else(|_| opts.grpc_addr.clone());
|
std::env::var("ORCH_CONTACT_ADDR").unwrap_or_else(|_| opts.grpc_addr.clone());
|
||||||
|
|
||||||
|
// Compose default runner URLs served by this orchestrator (if runner_dir configured)
|
||||||
|
let (runner_url_default, runner_urls_default) = if opts.runner_dir.is_some() {
|
||||||
|
// Derive host from ORCH_CONTACT_ADDR (host:port) and port from HTTP_ADDR
|
||||||
|
let http_host = orch_contact.split(':').next().unwrap_or("127.0.0.1");
|
||||||
|
let http_port = opts
|
||||||
|
.http_addr
|
||||||
|
.rsplit(':')
|
||||||
|
.next()
|
||||||
|
.unwrap_or("8081");
|
||||||
|
let base = format!("http://{}:{}/runners", http_host, http_port);
|
||||||
|
let linux_url = format!("{}/{}", base, "solstice-runner-linux");
|
||||||
|
let illumos_url = format!("{}/{}", base, "solstice-runner-illumos");
|
||||||
|
let single_url = format!("{}/{}", base, "solstice-runner");
|
||||||
|
info!(linux = %linux_url, illumos = %illumos_url, "serving runner binaries via orchestrator HTTP");
|
||||||
|
(single_url, format!("{} {}", linux_url, illumos_url))
|
||||||
|
} else {
|
||||||
|
(String::new(), String::new())
|
||||||
|
};
|
||||||
|
|
||||||
// Scheduler
|
// Scheduler
|
||||||
let sched = Scheduler::new(
|
let sched = Scheduler::new(
|
||||||
router,
|
router,
|
||||||
|
|
@ -161,11 +185,17 @@ async fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Determine runner URLs to inject into cloud-init
|
||||||
|
let runner_url_env = std::env::var("SOLSTICE_RUNNER_URL").unwrap_or_else(|_| runner_url_default.clone());
|
||||||
|
let runner_urls_env = std::env::var("SOLSTICE_RUNNER_URLS").unwrap_or_else(|_| runner_urls_default.clone());
|
||||||
|
|
||||||
// Consumer: enqueue and ack-on-accept
|
// Consumer: enqueue and ack-on-accept
|
||||||
let cfg_clone = cfg.clone();
|
let cfg_clone = cfg.clone();
|
||||||
let mq_cfg_clone = mq_cfg.clone();
|
let mq_cfg_clone = mq_cfg.clone();
|
||||||
let tx_for_consumer = sched_tx.clone();
|
let tx_for_consumer = sched_tx.clone();
|
||||||
let persist_for_consumer = persist.clone();
|
let persist_for_consumer = persist.clone();
|
||||||
|
let runner_url_for_consumer = runner_url_env.clone();
|
||||||
|
let runner_urls_for_consumer = runner_urls_env.clone();
|
||||||
// Start consumer that can be shut down cooperatively on ctrl-c
|
// Start consumer that can be shut down cooperatively on ctrl-c
|
||||||
let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>();
|
let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>();
|
||||||
let consumer_task = tokio::spawn(async move {
|
let consumer_task = tokio::spawn(async move {
|
||||||
|
|
@ -176,6 +206,8 @@ async fn main() -> Result<()> {
|
||||||
let cfg = cfg_clone.clone();
|
let cfg = cfg_clone.clone();
|
||||||
let persist = persist_for_consumer.clone();
|
let persist = persist_for_consumer.clone();
|
||||||
let orch_contact_val = orch_contact.clone();
|
let orch_contact_val = orch_contact.clone();
|
||||||
|
let runner_url_in = runner_url_for_consumer.clone();
|
||||||
|
let runner_urls_in = runner_urls_for_consumer.clone();
|
||||||
async move {
|
async move {
|
||||||
let label_resolved = cfg.resolve_label(job.runs_on.as_deref()).unwrap_or(&cfg.default_label).to_string();
|
let label_resolved = cfg.resolve_label(job.runs_on.as_deref()).unwrap_or(&cfg.default_label).to_string();
|
||||||
let image = match cfg.image_for(&label_resolved) {
|
let image = match cfg.image_for(&label_resolved) {
|
||||||
|
|
@ -201,7 +233,7 @@ async fn main() -> Result<()> {
|
||||||
disk_gb,
|
disk_gb,
|
||||||
network: None, // libvirt network handled in backend
|
network: None, // libvirt network handled in backend
|
||||||
nocloud: image.nocloud,
|
nocloud: image.nocloud,
|
||||||
user_data: Some(make_cloud_init_userdata(&job.repo_url, &job.commit_sha, job.request_id, &orch_contact_val)),
|
user_data: Some(make_cloud_init_userdata(&job.repo_url, &job.commit_sha, job.request_id, &orch_contact_val, &runner_url_in, &runner_urls_in)),
|
||||||
};
|
};
|
||||||
if !spec.nocloud {
|
if !spec.nocloud {
|
||||||
warn!(label = %label_resolved, "image is not marked nocloud=true; cloud-init may not work");
|
warn!(label = %label_resolved, "image is not marked nocloud=true; cloud-init may not work");
|
||||||
|
|
@ -221,9 +253,10 @@ async fn main() -> Result<()> {
|
||||||
// Start HTTP server
|
// Start HTTP server
|
||||||
let http_addr: std::net::SocketAddr = opts.http_addr.parse().into_diagnostic()?;
|
let http_addr: std::net::SocketAddr = opts.http_addr.parse().into_diagnostic()?;
|
||||||
let persist_for_http = persist.clone();
|
let persist_for_http = persist.clone();
|
||||||
|
let runner_dir_for_http = opts.runner_dir.clone();
|
||||||
let (http_shutdown_tx, http_shutdown_rx) = tokio::sync::oneshot::channel::<()>();
|
let (http_shutdown_tx, http_shutdown_rx) = tokio::sync::oneshot::channel::<()>();
|
||||||
let http_task = tokio::spawn(async move {
|
let http_task = tokio::spawn(async move {
|
||||||
crate::http::serve(http_addr, persist_for_http, async move {
|
crate::http::serve(http_addr, persist_for_http, runner_dir_for_http, async move {
|
||||||
let _ = http_shutdown_rx.await;
|
let _ = http_shutdown_rx.await;
|
||||||
}).await;
|
}).await;
|
||||||
});
|
});
|
||||||
|
|
@ -278,10 +311,9 @@ fn make_cloud_init_userdata(
|
||||||
commit_sha: &str,
|
commit_sha: &str,
|
||||||
request_id: uuid::Uuid,
|
request_id: uuid::Uuid,
|
||||||
orch_addr: &str,
|
orch_addr: &str,
|
||||||
|
runner_url: &str,
|
||||||
|
runner_urls: &str,
|
||||||
) -> Vec<u8> {
|
) -> Vec<u8> {
|
||||||
// Allow local dev to inject one or more runner URLs that the VM can fetch.
|
|
||||||
let runner_url = std::env::var("SOLSTICE_RUNNER_URL").unwrap_or_default();
|
|
||||||
let runner_urls = std::env::var("SOLSTICE_RUNNER_URLS").unwrap_or_default();
|
|
||||||
let s = format!(
|
let s = format!(
|
||||||
r#"#cloud-config
|
r#"#cloud-config
|
||||||
write_files:
|
write_files:
|
||||||
|
|
@ -396,6 +428,8 @@ mod tests {
|
||||||
"deadbeef",
|
"deadbeef",
|
||||||
req_id,
|
req_id,
|
||||||
"127.0.0.1:50051",
|
"127.0.0.1:50051",
|
||||||
|
"http://127.0.0.1:8081/runners/solstice-runner",
|
||||||
|
"http://127.0.0.1:8081/runners/solstice-runner-linux http://127.0.0.1:8081/runners/solstice-runner-illumos",
|
||||||
);
|
);
|
||||||
let s = String::from_utf8(data).unwrap();
|
let s = String::from_utf8(data).unwrap();
|
||||||
assert!(s.contains("#cloud-config"));
|
assert!(s.contains("#cloud-config"));
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue