Add explicit libvirt configuration support; remove environment variable reliance; bump version to 0.1.9

- Introduce `libvirt_uri` and `libvirt_network` in configuration structs, replacing reliance on environment variables.
- Update all `virsh`-related logic to use explicit parameters for libvirt connection and network settings.
- Align codebase with new guidelines rejecting runtime environment variable mutations.
- Document breaking changes in `.junie/guidelines.md`.
- Increment orchestrator version to 0.1.9.

Signed-off-by: Till Wegmueller <toasterson@gmail.com>
This commit is contained in:
Till Wegmueller 2025-11-17 22:40:50 +01:00
parent f1d161655f
commit 931e5ac81a
No known key found for this signature in database
4 changed files with 41 additions and 25 deletions

View file

@ -10,6 +10,20 @@ This document records project-specific build, test, and development conventions
## 1. Build and Configuration
BREAKING POLICY UPDATE — 2025-11-17
We do not set environment variables from Rust code anywhere in this repository. Processes may read environment variables exactly once at startup to build their in-memory configuration, and thereafter must pass values through explicit configuration structs and function parameters. Subprocesses must receive required configuration via flags/args or inherited environment coming from the supervisor (systemd, container runtime), not via mutations performed by our code.
Implications:
- Remove/avoid any calls to std::env::set_var, Command::env, or ad-hoc HOME/XDG_* rewrites in code. If a child process (e.g., virsh) needs a URI, pass it explicitly via its CLI flags (e.g., virsh -c <URI>) or ensure the supervisors EnvironmentFile provides it.
- Read environment only to construct config types (e.g., AppConfig, Opts via clap). Do not propagate config by mutating process env.
- Pass configuration through owned structs (e.g., ExecConfig) and function parameters. Do not fall back to environment once config is built.
- This change is intentionally not backward compatible with any earlier behavior that relied on code setting env vars at runtime.
Operational guidance:
- Systemd units and container definitions remain the source of truth for environment. Ensure EnvironmentFile entries are correct. For local runs, pass flags or export vars before invoking binaries.
- For libvirt tools like virsh, we pass the URI using -c and do not manipulate HOME/XDG_CACHE_HOME.
- Use the stable toolchain unless explicitly noted. The workspace uses the 2024 edition; keep `rustup` and `cargo` updated.
- Top-level build:
- Build everything: `cargo build --workspace`

View file

@ -1,6 +1,6 @@
[package]
name = "orchestrator"
version = "0.1.8"
version = "0.1.9"
edition = "2024"
build = "build.rs"

View file

@ -11,7 +11,7 @@ use std::os::unix::fs::PermissionsExt;
use clap::Parser;
use miette::{IntoDiagnostic as _, Result};
use tracing::{info, warn};
use tracing::{info, warn, debug};
use crate::persist::{JobState, Persist};
use crate::error::OrchestratorError;
@ -112,6 +112,7 @@ struct Opts {
#[tokio::main(flavor = "multi_thread")]
async fn main() -> Result<()> {
// Load internal config (preloads KDL -> env, then reads env)
let app_cfg = common::AppConfig::load("orchestrator")?;
let _t = common::init_tracing("solstice-orchestrator")?;
@ -160,6 +161,8 @@ async fn main() -> Result<()> {
runner_illumos_path: opts.runner_illumos_path.clone(),
ssh_connect_timeout_secs: opts.ssh_connect_timeout_secs,
boot_wait_secs: opts.boot_wait_secs,
libvirt_uri: opts.libvirt_uri.clone(),
libvirt_network: opts.libvirt_network.clone(),
};
let sched = Scheduler::new(

View file

@ -40,6 +40,8 @@ pub struct ExecConfig {
pub runner_illumos_path: String,
pub ssh_connect_timeout_secs: u64,
pub boot_wait_secs: u64,
pub libvirt_uri: String,
pub libvirt_network: String,
}
impl<H: Hypervisor + 'static> Scheduler<H> {
@ -164,7 +166,14 @@ impl<H: Hypervisor + 'static> Scheduler<H> {
#[cfg(all(target_os = "linux", feature = "libvirt"))]
{
let exec_cfg = exec_shared_inner.clone();
match discover_guest_ip_virsh(&h.id, Duration::from_secs(exec_cfg.ssh_connect_timeout_secs.min(300))).await {
let uri = exec_cfg.libvirt_uri.clone();
let net_opt = if exec_cfg.libvirt_network.trim().is_empty() { None } else { Some(exec_cfg.libvirt_network.clone()) };
match discover_guest_ip_virsh(
&h.id,
Duration::from_secs(exec_cfg.ssh_connect_timeout_secs.min(300)),
&uri,
net_opt.as_deref(),
).await {
Some(ip) => {
let ip_owned = ip.clone();
let user = item.ctx.ssh_user.clone().unwrap_or_else(|| "ubuntu".to_string());
@ -338,7 +347,7 @@ async fn snapshot_console_to_joblog(persist: Arc<Persist>, request_id: Uuid, con
}
#[cfg(all(target_os = "linux", feature = "libvirt"))]
async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option<String> {
async fn discover_guest_ip_virsh(domain: &str, timeout: Duration, libvirt_uri: &str, libvirt_network: Option<&str>) -> Option<String> {
use tokio::{task, time::{sleep, Instant, Duration}};
use std::process::Command;
use tracing::{debug, warn};
@ -381,31 +390,18 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option<Stri
stderr: String,
}
async fn run_cmd(args: &[&str]) -> Attempt {
// Determine libvirt URI and cache/home dirs (best effort)
let uri = std::env::var("LIBVIRT_URI")
.ok()
.or_else(|| std::env::var("LIBVIRT_DEFAULT_URI").ok())
.unwrap_or_else(|| "qemu:///system".to_string());
let cache_base = std::env::var("XDG_CACHE_HOME").unwrap_or_else(|_| "/var/lib/solstice-ci/.cache".to_string());
let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/var/lib/solstice-ci".to_string());
async fn run_cmd(args: &[&str], uri: &str) -> Attempt {
let args_vec = {
let mut v = Vec::with_capacity(args.len() + 2);
v.push("-c".to_string());
v.push(uri.clone());
v.push(uri.to_string());
v.extend(args.iter().map(|s| s.to_string()));
v
};
let cmd_desc = format!("virsh {}", args_vec.join(" "));
match task::spawn_blocking(move || {
// Ensure cache dir exists to avoid virsh trying to write under /nonexistent
let _ = std::fs::create_dir_all(format!("{}/libvirt", cache_base));
let mut cmd = Command::new("virsh");
cmd.args(&args_vec)
.env("LIBVIRT_DEFAULT_URI", &uri)
.env("XDG_CACHE_HOME", &cache_base)
.env("HOME", &home_dir);
cmd.args(&args_vec);
cmd.output()
}).await {
Ok(Ok(out)) => {
@ -416,7 +412,6 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option<Stri
Attempt { cmd: cmd_desc, ok, status, stdout, stderr }
}
other => {
// spawn or io error
Attempt { cmd: cmd_desc, ok: false, status: None, stdout: String::new(), stderr: format!("spawn error: {:?}", other) }
}
}
@ -430,7 +425,7 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option<Stri
for source in [Some("agent"), Some("lease"), None] {
let mut args = vec!["domifaddr", domain];
if let Some(src) = source { args.push("--source"); args.push(src); }
let att = run_cmd(&args).await;
let att = run_cmd(&args, libvirt_uri).await;
debug!(domain=%domain, method=%format!("domifaddr/{:?}", source), ok=att.ok, status=?att.status, stdout=%att.stdout, stderr=%att.stderr, cmd=%att.cmd, "virsh attempt");
if att.ok {
if let Some(ip) = parse_ipv4_from_text(&att.stdout) { debug!(domain=%domain, method=%format!("domifaddr/{:?}", source), ip=%ip, "discovered IP"); return Some(ip); }
@ -441,7 +436,7 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option<Stri
// 2) Try domiflist to get MAC and possibly network name
let mut mac: Option<String> = None;
let mut net_name: Option<String> = None;
let att_domiflist = run_cmd(&["domiflist", domain]).await;
let att_domiflist = run_cmd(&["domiflist", domain], libvirt_uri).await;
debug!(domain=%domain, method="domiflist", ok=att_domiflist.ok, status=?att_domiflist.status, stdout=%att_domiflist.stdout, stderr=%att_domiflist.stderr, cmd=%att_domiflist.cmd, "virsh attempt");
if att_domiflist.ok {
for line in att_domiflist.stdout.lines().skip(2) { // skip header lines
@ -457,7 +452,7 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option<Stri
last_attempts.push(att_domiflist);
// Fallback to env if domiflist didn't give a Source network
if net_name.is_none() {
if let Ok(env_net) = std::env::var("LIBVIRT_NETWORK") { if !env_net.is_empty() { net_name = Some(env_net); } }
if let Some(n) = libvirt_network { if !n.is_empty() { net_name = Some(n.to_string()); } }
}
// 2a) Parse leases file if we have network and MAC
if let (Some(net), Some(mac_s)) = (net_name.clone(), mac.clone()) {
@ -482,7 +477,7 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option<Stri
}
// 2b) Try virsh net-dhcp-leases <network>
if let Some(net) = net_name.clone() {
let att_leases = run_cmd(&["net-dhcp-leases", &net]).await;
let att_leases = run_cmd(&["net-dhcp-leases", &net], libvirt_uri).await;
debug!(domain=%domain, method="net-dhcp-leases", ok=att_leases.ok, status=?att_leases.status, stdout=%att_leases.stdout, stderr=%att_leases.stderr, cmd=%att_leases.cmd, "virsh attempt");
if att_leases.ok {
if let Some(ref mac_s) = mac {
@ -803,6 +798,8 @@ mod tests {
runner_illumos_path: "/tmp/runner-illumos".into(),
ssh_connect_timeout_secs: 30,
boot_wait_secs: 0,
libvirt_uri: "qemu:///system".into(),
libvirt_network: "default".into(),
};
let sched = Scheduler::new(hv, 2, &caps, persist, Duration::from_millis(10), Arc::new(common::MqConfig::default()), exec);
let tx = sched.sender();
@ -848,6 +845,8 @@ mod tests {
runner_illumos_path: "/tmp/runner-illumos".into(),
ssh_connect_timeout_secs: 30,
boot_wait_secs: 0,
libvirt_uri: "qemu:///system".into(),
libvirt_network: "default".into(),
};
let sched = Scheduler::new(hv, 4, &caps, persist, Duration::from_millis(10), Arc::new(common::MqConfig::default()), exec);
let tx = sched.sender();