Add optional TLS support and SMF service integration

Enable the API server to optionally serve HTTPS (disabled by default).
When --tls is passed without explicit cert/key paths, a self-signed CA
and server certificate are auto-generated via rcgen and persisted to
disk for reuse across restarts. The internal ApiClient learns to trust
the self-signed CA so controller/agent components work seamlessly over
TLS.

Also adds SIGTERM signal handling (alongside SIGINT) and graceful
shutdown via CancellationToken for both `serve` and `agent` modes,
plus an SMF manifest and method script so reddwarf can run as
svc:/system/reddwarf:default on illumos.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Till Wegmueller 2026-02-14 18:45:20 +01:00
parent 4bfcc39a69
commit cb6ca8cd3c
No known key found for this signature in database
11 changed files with 846 additions and 46 deletions

185
Cargo.lock generated
View file

@ -85,6 +85,15 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "arc-swap"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5"
dependencies = [
"rustversion",
]
[[package]]
name = "async-compression"
version = "0.4.37"
@ -120,6 +129,28 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "aws-lc-rs"
version = "1.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256"
dependencies = [
"aws-lc-sys",
"zeroize",
]
[[package]]
name = "aws-lc-sys"
version = "0.37.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549"
dependencies = [
"cc",
"cmake",
"dunce",
"fs_extra",
]
[[package]]
name = "axum"
version = "0.8.8"
@ -187,6 +218,28 @@ dependencies = [
"syn",
]
[[package]]
name = "axum-server"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1ab4a3ec9ea8a657c72d99a03a824af695bd0fb5ec639ccbd9cd3543b41a5f9"
dependencies = [
"arc-swap",
"bytes",
"fs-err",
"http",
"http-body",
"hyper",
"hyper-util",
"pin-project-lite",
"rustls",
"rustls-pemfile",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
]
[[package]]
name = "backtrace"
version = "0.3.76"
@ -257,6 +310,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
]
@ -320,6 +375,15 @@ version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
[[package]]
name = "cmake"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
dependencies = [
"cc",
]
[[package]]
name = "colorchoice"
version = "1.0.4"
@ -393,6 +457,15 @@ version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea"
[[package]]
name = "deranged"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4"
dependencies = [
"powerfmt",
]
[[package]]
name = "digest"
version = "0.10.7"
@ -414,6 +487,12 @@ dependencies = [
"syn",
]
[[package]]
name = "dunce"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -491,6 +570,22 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fs-err"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73fde052dbfc920003cfd2c8e2c6e6d4cc7c1091538c3a24226cec0665ab08c0"
dependencies = [
"autocfg",
"tokio",
]
[[package]]
name = "fs_extra"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "futures-channel"
version = "0.3.31"
@ -920,6 +1015,16 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "jobserver"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom 0.3.4",
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.85"
@ -1108,6 +1213,12 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "num-conv"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
[[package]]
name = "num-traits"
version = "0.2.19"
@ -1220,6 +1331,16 @@ dependencies = [
"windows-link",
]
[[package]]
name = "pem"
version = "3.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be"
dependencies = [
"base64",
"serde_core",
]
[[package]]
name = "percent-encoding"
version = "2.3.2"
@ -1253,6 +1374,12 @@ dependencies = [
"zerovec",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
@ -1315,6 +1442,19 @@ dependencies = [
"getrandom 0.3.4",
]
[[package]]
name = "rcgen"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75e669e5202259b5314d1ea5397316ad400819437857b90861765f24c4cf80a2"
dependencies = [
"pem",
"ring",
"rustls-pki-types",
"time",
"yasna",
]
[[package]]
name = "redb"
version = "2.6.3"
@ -1347,19 +1487,24 @@ name = "reddwarf-apiserver"
version = "0.1.0"
dependencies = [
"axum",
"axum-server",
"futures-util",
"hyper",
"json-patch",
"miette",
"rcgen",
"reddwarf-core",
"reddwarf-storage",
"reddwarf-versioning",
"rustls",
"rustls-pemfile",
"serde",
"serde_json",
"serde_yaml",
"tempfile",
"tokio",
"tokio-stream",
"tokio-util",
"tower",
"tower-http",
"tracing",
@ -1562,6 +1707,8 @@ version = "0.23.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b"
dependencies = [
"aws-lc-rs",
"log",
"once_cell",
"rustls-pki-types",
"rustls-webpki",
@ -1569,6 +1716,15 @@ dependencies = [
"zeroize",
]
[[package]]
name = "rustls-pemfile"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
dependencies = [
"rustls-pki-types",
]
[[package]]
name = "rustls-pki-types"
version = "1.14.0"
@ -1584,6 +1740,7 @@ version = "0.103.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
dependencies = [
"aws-lc-rs",
"ring",
"rustls-pki-types",
"untrusted",
@ -1965,6 +2122,25 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "time"
version = "0.3.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
dependencies = [
"deranged",
"num-conv",
"powerfmt",
"serde_core",
"time-core",
]
[[package]]
name = "time-core"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
[[package]]
name = "tinystr"
version = "0.8.2"
@ -2647,6 +2823,15 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
[[package]]
name = "yasna"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd"
dependencies = [
"time",
]
[[package]]
name = "yoke"
version = "0.8.1"

View file

@ -78,6 +78,7 @@ clap = { version = "4.5", features = ["derive", "env"] }
rcgen = "0.13"
rustls = "0.23"
rustls-pemfile = "2.0"
axum-server = { version = "0.7", features = ["tls-rustls"] }
# Testing
tempfile = "3.0"

View file

@ -26,6 +26,11 @@ uuid = { workspace = true }
tokio-stream = { workspace = true }
futures-util = { workspace = true }
json-patch = "3.0"
rcgen = { workspace = true }
rustls = { workspace = true }
rustls-pemfile = { workspace = true }
axum-server = { workspace = true }
tokio-util = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }

View file

@ -13,6 +13,7 @@ pub mod handlers;
pub mod response;
pub mod server;
pub mod state;
pub mod tls;
pub mod validation;
pub mod watch;
@ -21,3 +22,4 @@ pub use error::{ApiError, Result};
pub use event_bus::ResourceEvent;
pub use server::{ApiServer, Config};
pub use state::AppState;
pub use tls::{TlsMaterial, TlsMode};

View file

@ -1,10 +1,12 @@
use crate::handlers::*;
use crate::tls::{self, TlsMaterial, TlsMode};
use crate::AppState;
use axum::routing::get;
use axum::Router;
use std::net::SocketAddr;
use std::sync::Arc;
use tokio::net::TcpListener;
use tokio_util::sync::CancellationToken;
use tower_http::trace::TraceLayer;
use tracing::info;
@ -13,12 +15,15 @@ use tracing::info;
pub struct Config {
/// Address to listen on
pub listen_addr: SocketAddr,
/// TLS configuration
pub tls_mode: TlsMode,
}
impl Default for Config {
fn default() -> Self {
Self {
listen_addr: "127.0.0.1:6443".parse().unwrap(),
tls_mode: TlsMode::Disabled,
}
}
}
@ -35,6 +40,15 @@ impl ApiServer {
Self { config, state }
}
/// Resolve TLS material from the configured mode.
///
/// Returns `None` when TLS is disabled. Call this before `run()` to extract
/// the CA PEM for passing to internal clients that need to trust the
/// self-signed certificate.
pub fn resolve_tls_material(&self) -> miette::Result<Option<TlsMaterial>> {
tls::resolve_tls(&self.config.tls_mode)
}
/// Build the router
fn build_router(&self) -> Router {
Router::new()
@ -94,15 +108,56 @@ impl ApiServer {
.with_state(self.state.clone())
}
/// Run the server
pub async fn run(self) -> Result<(), std::io::Error> {
/// Run the server, shutting down gracefully when `token` is cancelled.
pub async fn run(self, token: CancellationToken) -> Result<(), std::io::Error> {
let app = self.build_router();
info!("Starting API server on {}", self.config.listen_addr);
let tls_material = self
.resolve_tls_material()
.map_err(|e| std::io::Error::other(format!("TLS setup failed: {e}")))?;
let listener = TcpListener::bind(self.config.listen_addr).await?;
match tls_material {
None => {
info!(
"Starting API server on {} (plain HTTP)",
self.config.listen_addr
);
let listener = TcpListener::bind(self.config.listen_addr).await?;
axum::serve(listener, app)
.with_graceful_shutdown(async move {
token.cancelled().await;
})
.await
}
Some(material) => {
info!(
"Starting API server on {} (HTTPS)",
self.config.listen_addr
);
let rustls_config = axum_server::tls_rustls::RustlsConfig::from_pem(
material.cert_pem,
material.key_pem,
)
.await
.map_err(|e| {
std::io::Error::other(format!("failed to build RustlsConfig: {e}"))
})?;
axum::serve(listener, app).await
let handle = axum_server::Handle::new();
let shutdown_handle = handle.clone();
tokio::spawn(async move {
token.cancelled().await;
shutdown_handle
.graceful_shutdown(Some(std::time::Duration::from_secs(10)));
});
axum_server::bind_rustls(self.config.listen_addr, rustls_config)
.handle(handle)
.serve(app.into_make_service())
.await
}
}
}
}
@ -132,6 +187,7 @@ mod tests {
fn test_default_config() {
let config = Config::default();
assert_eq!(config.listen_addr.to_string(), "127.0.0.1:6443");
assert!(matches!(config.tls_mode, TlsMode::Disabled));
}
#[test]

View file

@ -0,0 +1,252 @@
use miette::{Context, IntoDiagnostic};
use rcgen::{BasicConstraints, CertificateParams, ExtendedKeyUsagePurpose, IsCa, KeyPair};
use std::path::{Path, PathBuf};
use tracing::info;
/// How TLS should be configured for the API server.
#[derive(Debug, Clone)]
pub enum TlsMode {
/// No TLS — plain HTTP.
Disabled,
/// Auto-generate a self-signed CA + server certificate.
/// Certs are persisted under `data_dir` and reused on restart.
AutoGenerate {
data_dir: PathBuf,
san_entries: Vec<String>,
},
/// Use explicitly provided PEM certificate and key files.
Provided {
cert_path: PathBuf,
key_path: PathBuf,
},
}
/// Resolved TLS key material ready for use by the server.
#[derive(Debug, Clone)]
pub struct TlsMaterial {
pub cert_pem: Vec<u8>,
pub key_pem: Vec<u8>,
pub ca_pem: Option<Vec<u8>>,
}
/// Resolve TLS material from the given mode.
///
/// - `Disabled` → returns `None`
/// - `AutoGenerate` → checks for existing certs on disk; generates if missing
/// - `Provided` → reads cert/key from the supplied paths
pub fn resolve_tls(mode: &TlsMode) -> miette::Result<Option<TlsMaterial>> {
match mode {
TlsMode::Disabled => Ok(None),
TlsMode::AutoGenerate {
data_dir,
san_entries,
} => {
let ca_path = data_dir.join("ca.pem");
let cert_path = data_dir.join("server.pem");
let key_path = data_dir.join("server-key.pem");
if ca_path.exists() && cert_path.exists() && key_path.exists() {
info!("Loading existing TLS certificates from {}", data_dir.display());
let ca_pem = std::fs::read(&ca_path)
.into_diagnostic()
.wrap_err_with(|| format!("failed to read CA cert at {}", ca_path.display()))?;
let cert_pem = std::fs::read(&cert_path)
.into_diagnostic()
.wrap_err_with(|| {
format!("failed to read server cert at {}", cert_path.display())
})?;
let key_pem = std::fs::read(&key_path)
.into_diagnostic()
.wrap_err_with(|| {
format!("failed to read server key at {}", key_path.display())
})?;
Ok(Some(TlsMaterial {
cert_pem,
key_pem,
ca_pem: Some(ca_pem),
}))
} else {
info!(
"Auto-generating self-signed TLS certificates in {}",
data_dir.display()
);
generate_self_signed(data_dir, san_entries).map(Some)
}
}
TlsMode::Provided {
cert_path,
key_path,
} => {
let cert_pem = std::fs::read(cert_path)
.into_diagnostic()
.wrap_err_with(|| {
format!("failed to read TLS cert at {}", cert_path.display())
})?;
let key_pem = std::fs::read(key_path)
.into_diagnostic()
.wrap_err_with(|| format!("failed to read TLS key at {}", key_path.display()))?;
Ok(Some(TlsMaterial {
cert_pem,
key_pem,
ca_pem: None,
}))
}
}
}
/// Generate a self-signed CA and server certificate, writing PEM files to `data_dir`.
fn generate_self_signed(data_dir: &Path, san_entries: &[String]) -> miette::Result<TlsMaterial> {
std::fs::create_dir_all(data_dir)
.into_diagnostic()
.wrap_err_with(|| format!("failed to create TLS directory {}", data_dir.display()))?;
// --- CA ---
let ca_key = KeyPair::generate().into_diagnostic().wrap_err("failed to generate CA key pair")?;
let mut ca_params = CertificateParams::new(vec!["Reddwarf CA".to_string()])
.into_diagnostic()
.wrap_err("failed to create CA certificate params")?;
ca_params.is_ca = IsCa::Ca(BasicConstraints::Unconstrained);
let ca_cert = ca_params
.self_signed(&ca_key)
.into_diagnostic()
.wrap_err("failed to self-sign CA certificate")?;
// --- Server cert ---
let server_key = KeyPair::generate()
.into_diagnostic()
.wrap_err("failed to generate server key pair")?;
let mut server_params = CertificateParams::new(san_entries.to_vec())
.into_diagnostic()
.wrap_err("failed to create server certificate params")?;
server_params.extended_key_usages = vec![ExtendedKeyUsagePurpose::ServerAuth];
let server_cert = server_params
.signed_by(&server_key, &ca_cert, &ca_key)
.into_diagnostic()
.wrap_err("failed to sign server certificate with CA")?;
// --- Serialize ---
let ca_pem = ca_cert.pem();
let cert_pem = server_cert.pem();
let key_pem = server_key.serialize_pem();
// --- Write files ---
let ca_path = data_dir.join("ca.pem");
let cert_path = data_dir.join("server.pem");
let key_path = data_dir.join("server-key.pem");
std::fs::write(&ca_path, &ca_pem)
.into_diagnostic()
.wrap_err_with(|| format!("failed to write CA cert to {}", ca_path.display()))?;
std::fs::write(&cert_path, &cert_pem)
.into_diagnostic()
.wrap_err_with(|| format!("failed to write server cert to {}", cert_path.display()))?;
std::fs::write(&key_path, &key_pem)
.into_diagnostic()
.wrap_err_with(|| format!("failed to write server key to {}", key_path.display()))?;
info!(
"TLS certificates written to {} (ca.pem, server.pem, server-key.pem)",
data_dir.display()
);
Ok(TlsMaterial {
cert_pem: cert_pem.into_bytes(),
key_pem: key_pem.into_bytes(),
ca_pem: Some(ca_pem.into_bytes()),
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_auto_generate_creates_certs() {
let dir = tempdir().unwrap();
let tls_dir = dir.path().join("tls");
let mode = TlsMode::AutoGenerate {
data_dir: tls_dir.clone(),
san_entries: vec!["localhost".to_string(), "127.0.0.1".to_string()],
};
let material = resolve_tls(&mode).unwrap().expect("should produce material");
assert!(!material.cert_pem.is_empty());
assert!(!material.key_pem.is_empty());
assert!(material.ca_pem.is_some());
// Verify files were written
assert!(tls_dir.join("ca.pem").exists());
assert!(tls_dir.join("server.pem").exists());
assert!(tls_dir.join("server-key.pem").exists());
}
#[test]
fn test_auto_generate_reuses_existing() {
let dir = tempdir().unwrap();
let tls_dir = dir.path().join("tls");
let mode = TlsMode::AutoGenerate {
data_dir: tls_dir.clone(),
san_entries: vec!["localhost".to_string()],
};
// First call generates
let first = resolve_tls(&mode).unwrap().unwrap();
// Second call loads the same files
let second = resolve_tls(&mode).unwrap().unwrap();
assert_eq!(first.cert_pem, second.cert_pem);
assert_eq!(first.key_pem, second.key_pem);
assert_eq!(first.ca_pem, second.ca_pem);
}
#[test]
fn test_provided_loads_files() {
let dir = tempdir().unwrap();
let tls_dir = dir.path().join("tls");
// Generate certs first so we have valid PEM to load
let gen_mode = TlsMode::AutoGenerate {
data_dir: tls_dir.clone(),
san_entries: vec!["localhost".to_string()],
};
resolve_tls(&gen_mode).unwrap();
let mode = TlsMode::Provided {
cert_path: tls_dir.join("server.pem"),
key_path: tls_dir.join("server-key.pem"),
};
let material = resolve_tls(&mode).unwrap().expect("should produce material");
assert!(!material.cert_pem.is_empty());
assert!(!material.key_pem.is_empty());
assert!(material.ca_pem.is_none());
}
#[test]
fn test_provided_missing_file_errors() {
let mode = TlsMode::Provided {
cert_path: PathBuf::from("/nonexistent/cert.pem"),
key_path: PathBuf::from("/nonexistent/key.pem"),
};
let result = resolve_tls(&mode);
assert!(result.is_err());
}
#[test]
fn test_disabled_returns_none() {
let result = resolve_tls(&TlsMode::Disabled).unwrap();
assert!(result.is_none());
}
}

View file

@ -20,12 +20,54 @@ pub struct WatchEvent<T> {
impl ApiClient {
pub fn new(base_url: &str) -> Self {
Self::with_ca_cert(base_url, None)
}
/// Create a client that optionally trusts an additional CA certificate.
///
/// When connecting to a server with a self-signed certificate, pass the
/// CA PEM bytes here so the client will accept it.
pub fn with_ca_cert(base_url: &str, ca_pem: Option<&[u8]>) -> Self {
let mut builder = Client::builder();
if let Some(pem) = ca_pem {
if let Ok(cert) = reqwest::Certificate::from_pem(pem) {
builder = builder.add_root_certificate(cert);
}
}
Self {
base_url: base_url.trim_end_matches('/').to_string(),
client: Client::new(),
client: builder.build().unwrap_or_else(|_| Client::new()),
}
}
/// Generic GET that returns a JSON value.
pub async fn get_json(&self, path: &str) -> Result<serde_json::Value> {
let url = format!("{}{}", self.base_url, path);
debug!("GET {}", url);
let resp = self
.client
.get(&url)
.send()
.await
.map_err(|e| RuntimeError::internal_error(format!("HTTP request failed: {}", e)))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(RuntimeError::internal_error(format!(
"GET {} failed with status {}: {}",
path, status, body
)));
}
resp.json::<serde_json::Value>()
.await
.map_err(|e| RuntimeError::internal_error(format!("Failed to parse response: {}", e)))
}
/// GET /api/v1/namespaces/{namespace}/pods/{name}
pub async fn get_pod(&self, namespace: &str, name: &str) -> Result<Pod> {
let url = format!(
@ -189,3 +231,27 @@ impl ApiClient {
&self.base_url
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_builds_client() {
let client = ApiClient::new("http://127.0.0.1:6443");
assert_eq!(client.base_url(), "http://127.0.0.1:6443");
}
#[test]
fn test_with_ca_cert_none() {
let client = ApiClient::with_ca_cert("https://127.0.0.1:6443", None);
assert_eq!(client.base_url(), "https://127.0.0.1:6443");
}
#[test]
fn test_with_ca_cert_invalid_pem_falls_back() {
// Invalid PEM should not panic — just builds a client without the cert
let client = ApiClient::with_ca_cert("https://127.0.0.1:6443", Some(b"not-a-pem"));
assert_eq!(client.base_url(), "https://127.0.0.1:6443");
}
}

View file

@ -132,19 +132,8 @@ impl PodController {
async fn reconcile_all(&self) -> Result<()> {
debug!("Running pod controller reconcile cycle");
// List all pods via the API
let url = format!("{}/api/v1/pods", self.api_client.base_url());
let resp = reqwest::get(&url)
.await
.map_err(|e| RuntimeError::internal_error(format!("Failed to list pods: {}", e)))?;
if !resp.status().is_success() {
return Err(RuntimeError::internal_error("Failed to list pods"));
}
let body: serde_json::Value = resp.json().await.map_err(|e| {
RuntimeError::internal_error(format!("Failed to parse pod list: {}", e))
})?;
// List all pods via the API client (respects TLS configuration)
let body = self.api_client.get_json("/api/v1/pods").await?;
let items = body["items"].as_array().cloned().unwrap_or_default();

View file

@ -1,5 +1,5 @@
use clap::{Parser, Subcommand};
use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig};
use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig, TlsMode};
use reddwarf_core::Namespace;
use reddwarf_runtime::{
ApiClient, Ipam, MockRuntime, MockStorageEngine, NodeAgent, NodeAgentConfig, PodController,
@ -9,6 +9,7 @@ use reddwarf_scheduler::scheduler::SchedulerConfig;
use reddwarf_scheduler::Scheduler;
use reddwarf_storage::RedbBackend;
use reddwarf_versioning::VersionStore;
use std::path::PathBuf;
use std::sync::Arc;
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
@ -20,6 +21,23 @@ struct Cli {
command: Commands,
}
/// Shared TLS arguments for both `serve` and `agent` subcommands.
#[derive(clap::Args, Clone, Debug)]
struct TlsArgs {
/// Enable TLS (HTTPS). When set without --tls-cert/--tls-key, a
/// self-signed CA + server certificate is auto-generated.
#[arg(long, default_value_t = false)]
tls: bool,
/// Path to a PEM-encoded TLS certificate (requires --tls)
#[arg(long, requires = "tls")]
tls_cert: Option<String>,
/// Path to a PEM-encoded TLS private key (requires --tls)
#[arg(long, requires = "tls")]
tls_key: Option<String>,
}
#[derive(Subcommand)]
enum Commands {
/// Run the API server only
@ -30,6 +48,8 @@ enum Commands {
/// Path to the redb database file
#[arg(long, default_value = "./reddwarf.redb")]
data_dir: String,
#[command(flatten)]
tls_args: TlsArgs,
},
/// Run as a full node agent (API server + scheduler + controller + heartbeat)
Agent {
@ -63,6 +83,8 @@ enum Commands {
/// Etherstub name for pod networking
#[arg(long, default_value = "reddwarf0")]
etherstub_name: String,
#[command(flatten)]
tls_args: TlsArgs,
},
}
@ -79,7 +101,11 @@ async fn main() -> miette::Result<()> {
let cli = Cli::parse();
match cli.command {
Commands::Serve { bind, data_dir } => run_serve(&bind, &data_dir).await,
Commands::Serve {
bind,
data_dir,
tls_args,
} => run_serve(&bind, &data_dir, &tls_args).await,
Commands::Agent {
node_name,
bind,
@ -91,6 +117,7 @@ async fn main() -> miette::Result<()> {
zonepath_prefix,
pod_cidr,
etherstub_name,
tls_args,
} => {
run_agent(
&node_name,
@ -103,31 +130,86 @@ async fn main() -> miette::Result<()> {
zonepath_prefix.as_deref(),
&pod_cidr,
&etherstub_name,
&tls_args,
)
.await
}
}
}
/// Wait for either SIGINT (ctrl-c) or SIGTERM, returning which one fired.
async fn shutdown_signal() -> &'static str {
use tokio::signal::unix::{signal, SignalKind};
let mut sigterm = signal(SignalKind::terminate()).expect("failed to install SIGTERM handler");
tokio::select! {
_ = tokio::signal::ctrl_c() => "SIGINT",
_ = sigterm.recv() => "SIGTERM",
}
}
/// Derive a `TlsMode` from CLI arguments.
fn tls_mode_from_args(args: &TlsArgs, data_dir: &str) -> miette::Result<TlsMode> {
if !args.tls {
return Ok(TlsMode::Disabled);
}
match (&args.tls_cert, &args.tls_key) {
(Some(cert), Some(key)) => Ok(TlsMode::Provided {
cert_path: PathBuf::from(cert),
key_path: PathBuf::from(key),
}),
(None, None) => {
let parent = PathBuf::from(data_dir)
.parent()
.unwrap_or_else(|| std::path::Path::new("."))
.to_path_buf();
Ok(TlsMode::AutoGenerate {
data_dir: parent.join("tls"),
san_entries: vec!["localhost".to_string(), "127.0.0.1".to_string()],
})
}
_ => Err(miette::miette!(
help = "Provide both --tls-cert and --tls-key, or omit both to auto-generate.",
"When using --tls, you must supply both --tls-cert and --tls-key together"
)),
}
}
/// Run only the API server
async fn run_serve(bind: &str, data_dir: &str) -> miette::Result<()> {
async fn run_serve(bind: &str, data_dir: &str, tls_args: &TlsArgs) -> miette::Result<()> {
info!("Starting reddwarf API server");
let state = create_app_state(data_dir)?;
bootstrap_default_namespace(&state).await?;
let tls_mode = tls_mode_from_args(tls_args, data_dir)?;
let config = ApiConfig {
listen_addr: bind
.parse()
.map_err(|e| miette::miette!("Invalid bind address '{}': {}", bind, e))?,
tls_mode,
};
let token = CancellationToken::new();
let server = ApiServer::new(config, state);
server
.run()
.await
.map_err(|e| miette::miette!("API server error: {}", e))?;
let server_token = token.clone();
let server_handle = tokio::spawn(async move {
if let Err(e) = server.run(server_token).await {
error!("API server error: {}", e);
}
});
let sig = shutdown_signal().await;
info!("Received {}, shutting down gracefully...", sig);
token.cancel();
let _ = tokio::time::timeout(std::time::Duration::from_secs(5), server_handle).await;
info!("Shutdown complete");
Ok(())
}
@ -145,6 +227,7 @@ async fn run_agent(
zonepath_prefix: Option<&str>,
pod_cidr: &str,
etherstub_name: &str,
tls_args: &TlsArgs,
) -> miette::Result<()> {
info!("Starting reddwarf agent for node '{}'", node_name);
@ -174,25 +257,30 @@ async fn run_agent(
.await
.map_err(|e| miette::miette!("Failed to initialize storage: {}", e))?;
// Determine the API URL for internal components to connect to
let api_url = format!("http://127.0.0.1:{}", listen_addr.port());
// Build TLS mode
let tls_mode = tls_mode_from_args(tls_args, data_dir)?;
let tls_enabled = !matches!(tls_mode, TlsMode::Disabled);
// Determine the API URL for internal components
let scheme = if tls_enabled { "https" } else { "http" };
let api_url = format!("{scheme}://127.0.0.1:{}", listen_addr.port());
let token = CancellationToken::new();
// 1. Spawn API server
let api_config = ApiConfig { listen_addr };
// 1. Build API server and resolve TLS material *before* spawning
let api_config = ApiConfig {
listen_addr,
tls_mode,
};
let api_server = ApiServer::new(api_config, state.clone());
let tls_material = api_server.resolve_tls_material()?;
let ca_pem = tls_material.as_ref().and_then(|m| m.ca_pem.clone());
let api_token = token.clone();
let api_handle = tokio::spawn(async move {
tokio::select! {
result = api_server.run() => {
if let Err(e) = result {
error!("API server error: {}", e);
}
}
_ = api_token.cancelled() => {
info!("API server shutting down");
}
if let Err(e) = api_server.run(api_token).await {
error!("API server error: {}", e);
}
});
@ -222,7 +310,7 @@ async fn run_agent(
})?;
// 5. Spawn pod controller
let api_client = Arc::new(ApiClient::new(&api_url));
let api_client = Arc::new(ApiClient::with_ca_cert(&api_url, ca_pem.as_deref()));
let controller_config = PodControllerConfig {
node_name: node_name.to_string(),
api_url: api_url.clone(),
@ -261,12 +349,9 @@ async fn run_agent(
bind, node_name, pod_cidr
);
// Wait for shutdown signal
tokio::signal::ctrl_c()
.await
.map_err(|e| miette::miette!("Failed to listen for ctrl-c: {}", e))?;
info!("Shutting down gracefully...");
// Wait for shutdown signal (SIGINT or SIGTERM)
let sig = shutdown_signal().await;
info!("Received {}, shutting down gracefully...", sig);
token.cancel();
// Wait for all tasks to finish with a timeout

94
smf/manifest.xml Normal file
View file

@ -0,0 +1,94 @@
<?xml version="1.0"?>
<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
<!--
SMF service manifest for reddwarf — a Kubernetes-compatible control plane
for illumos zones.
Import: svccfg import /opt/reddwarf/lib/svc/manifest/reddwarf.xml
Enable: svcadm enable svc:/system/reddwarf:default
Logs: svcs -L svc:/system/reddwarf:default
-->
<service_bundle type="manifest" name="reddwarf">
<service name="system/reddwarf" type="service" version="1">
<create_default_instance enabled="false" />
<!-- Long-running daemon -->
<single_instance />
<dependency name="filesystem"
grouping="require_all"
restart_on="error"
type="service">
<service_fmri value="svc:/system/filesystem/local" />
</dependency>
<dependency name="loopback"
grouping="require_all"
restart_on="error"
type="service">
<service_fmri value="svc:/network/loopback" />
</dependency>
<dependency name="physical-network"
grouping="optional_all"
restart_on="error"
type="service">
<service_fmri value="svc:/network/physical" />
</dependency>
<exec_method type="method"
name="start"
exec="/opt/reddwarf/lib/svc/method/reddwarf start"
timeout_seconds="60">
<method_context>
<method_credential user="root" group="root" />
</method_context>
</exec_method>
<exec_method type="method"
name="stop"
exec=":kill -TERM"
timeout_seconds="30" />
<exec_method type="method"
name="refresh"
exec=":true"
timeout_seconds="10" />
<property_group name="startd" type="framework">
<propval name="duration" type="astring" value="child" />
<propval name="ignore_error" type="astring" value="core,signal" />
</property_group>
<property_group name="application" type="application">
<propval name="node_name" type="astring" value="" />
<propval name="listen_addr" type="astring" value="0.0.0.0:6443" />
<propval name="data_dir" type="astring" value="/var/lib/reddwarf/reddwarf.redb" />
<propval name="storage_pool" type="astring" value="rpool" />
<propval name="pod_cidr" type="astring" value="10.88.0.0/16" />
<propval name="etherstub_name" type="astring" value="reddwarf0" />
<propval name="tls_enabled" type="boolean" value="false" />
<propval name="tls_cert" type="astring" value="" />
<propval name="tls_key" type="astring" value="" />
</property_group>
<stability value="Evolving" />
<template>
<common_name>
<loctext xml:lang="C">Reddwarf Kubernetes Control Plane</loctext>
</common_name>
<description>
<loctext xml:lang="C">
Reddwarf node agent — runs the API server, scheduler,
pod controller, and node heartbeat as a single daemon
managing illumos zones as Kubernetes pods.
</loctext>
</description>
</template>
</service>
</service_bundle>

65
smf/method.sh Executable file
View file

@ -0,0 +1,65 @@
#!/usr/bin/bash
#
# SMF method script for svc:/system/reddwarf:default
#
# Reads configuration from SMF properties and launches
# the reddwarf agent daemon.
#
. /lib/svc/share/smf_include.sh
REDDWARF_BIN="/opt/reddwarf/bin/reddwarf"
getprop() {
svcprop -p "application/$1" "$SMF_FMRI" 2>/dev/null
}
case "$1" in
start)
node_name=$(getprop node_name)
listen_addr=$(getprop listen_addr)
data_dir=$(getprop data_dir)
storage_pool=$(getprop storage_pool)
pod_cidr=$(getprop pod_cidr)
etherstub_name=$(getprop etherstub_name)
tls_enabled=$(getprop tls_enabled)
tls_cert=$(getprop tls_cert)
tls_key=$(getprop tls_key)
# Default node_name to hostname if not set
if [ -z "$node_name" ]; then
node_name=$(hostname)
fi
# Ensure data directory parent exists
data_parent=$(dirname "$data_dir")
mkdir -p "$data_parent"
# Build command line
cmd="$REDDWARF_BIN agent"
cmd="$cmd --node-name $node_name"
cmd="$cmd --bind $listen_addr"
cmd="$cmd --data-dir $data_dir"
cmd="$cmd --storage-pool $storage_pool"
cmd="$cmd --pod-cidr $pod_cidr"
cmd="$cmd --etherstub-name $etherstub_name"
if [ "$tls_enabled" = "true" ]; then
cmd="$cmd --tls"
if [ -n "$tls_cert" ] && [ -n "$tls_key" ]; then
cmd="$cmd --tls-cert $tls_cert"
cmd="$cmd --tls-key $tls_key"
fi
fi
# Launch the daemon
exec $cmd &
exit $SMF_EXIT_OK
;;
*)
echo "Usage: $0 { start }"
exit $SMF_EXIT_ERR_FATAL
;;
esac