diff --git a/.junie/guidelines.md b/.junie/guidelines.md index 7f46fc3..253291b 100644 --- a/.junie/guidelines.md +++ b/.junie/guidelines.md @@ -10,6 +10,20 @@ This document records project-specific build, test, and development conventions ## 1. Build and Configuration +BREAKING POLICY UPDATE — 2025-11-17 + +We do not set environment variables from Rust code anywhere in this repository. Processes may read environment variables exactly once at startup to build their in-memory configuration, and thereafter must pass values through explicit configuration structs and function parameters. Subprocesses must receive required configuration via flags/args or inherited environment coming from the supervisor (systemd, container runtime), not via mutations performed by our code. + +Implications: +- Remove/avoid any calls to std::env::set_var, Command::env, or ad-hoc HOME/XDG_* rewrites in code. If a child process (e.g., virsh) needs a URI, pass it explicitly via its CLI flags (e.g., virsh -c ) or ensure the supervisor’s EnvironmentFile provides it. +- Read environment only to construct config types (e.g., AppConfig, Opts via clap). Do not propagate config by mutating process env. +- Pass configuration through owned structs (e.g., ExecConfig) and function parameters. Do not fall back to environment once config is built. +- This change is intentionally not backward compatible with any earlier behavior that relied on code setting env vars at runtime. + +Operational guidance: +- Systemd units and container definitions remain the source of truth for environment. Ensure EnvironmentFile entries are correct. For local runs, pass flags or export vars before invoking binaries. +- For libvirt tools like virsh, we pass the URI using -c and do not manipulate HOME/XDG_CACHE_HOME. + - Use the stable toolchain unless explicitly noted. The workspace uses the 2024 edition; keep `rustup` and `cargo` updated. - Top-level build: - Build everything: `cargo build --workspace` diff --git a/crates/orchestrator/Cargo.toml b/crates/orchestrator/Cargo.toml index 74d294f..c9dea9a 100644 --- a/crates/orchestrator/Cargo.toml +++ b/crates/orchestrator/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "orchestrator" -version = "0.1.8" +version = "0.1.9" edition = "2024" build = "build.rs" diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index f9ad46d..2ef686c 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -11,7 +11,7 @@ use std::os::unix::fs::PermissionsExt; use clap::Parser; use miette::{IntoDiagnostic as _, Result}; -use tracing::{info, warn}; +use tracing::{info, warn, debug}; use crate::persist::{JobState, Persist}; use crate::error::OrchestratorError; @@ -112,6 +112,7 @@ struct Opts { #[tokio::main(flavor = "multi_thread")] async fn main() -> Result<()> { + // Load internal config (preloads KDL -> env, then reads env) let app_cfg = common::AppConfig::load("orchestrator")?; let _t = common::init_tracing("solstice-orchestrator")?; @@ -160,6 +161,8 @@ async fn main() -> Result<()> { runner_illumos_path: opts.runner_illumos_path.clone(), ssh_connect_timeout_secs: opts.ssh_connect_timeout_secs, boot_wait_secs: opts.boot_wait_secs, + libvirt_uri: opts.libvirt_uri.clone(), + libvirt_network: opts.libvirt_network.clone(), }; let sched = Scheduler::new( diff --git a/crates/orchestrator/src/scheduler.rs b/crates/orchestrator/src/scheduler.rs index 42193ca..fe9a7a1 100644 --- a/crates/orchestrator/src/scheduler.rs +++ b/crates/orchestrator/src/scheduler.rs @@ -40,6 +40,8 @@ pub struct ExecConfig { pub runner_illumos_path: String, pub ssh_connect_timeout_secs: u64, pub boot_wait_secs: u64, + pub libvirt_uri: String, + pub libvirt_network: String, } impl Scheduler { @@ -164,7 +166,14 @@ impl Scheduler { #[cfg(all(target_os = "linux", feature = "libvirt"))] { let exec_cfg = exec_shared_inner.clone(); - match discover_guest_ip_virsh(&h.id, Duration::from_secs(exec_cfg.ssh_connect_timeout_secs.min(300))).await { + let uri = exec_cfg.libvirt_uri.clone(); + let net_opt = if exec_cfg.libvirt_network.trim().is_empty() { None } else { Some(exec_cfg.libvirt_network.clone()) }; + match discover_guest_ip_virsh( + &h.id, + Duration::from_secs(exec_cfg.ssh_connect_timeout_secs.min(300)), + &uri, + net_opt.as_deref(), + ).await { Some(ip) => { let ip_owned = ip.clone(); let user = item.ctx.ssh_user.clone().unwrap_or_else(|| "ubuntu".to_string()); @@ -338,7 +347,7 @@ async fn snapshot_console_to_joblog(persist: Arc, request_id: Uuid, con } #[cfg(all(target_os = "linux", feature = "libvirt"))] -async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option { +async fn discover_guest_ip_virsh(domain: &str, timeout: Duration, libvirt_uri: &str, libvirt_network: Option<&str>) -> Option { use tokio::{task, time::{sleep, Instant, Duration}}; use std::process::Command; use tracing::{debug, warn}; @@ -381,31 +390,18 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option Attempt { - // Determine libvirt URI and cache/home dirs (best effort) - let uri = std::env::var("LIBVIRT_URI") - .ok() - .or_else(|| std::env::var("LIBVIRT_DEFAULT_URI").ok()) - .unwrap_or_else(|| "qemu:///system".to_string()); - let cache_base = std::env::var("XDG_CACHE_HOME").unwrap_or_else(|_| "/var/lib/solstice-ci/.cache".to_string()); - let home_dir = std::env::var("HOME").unwrap_or_else(|_| "/var/lib/solstice-ci".to_string()); - + async fn run_cmd(args: &[&str], uri: &str) -> Attempt { let args_vec = { let mut v = Vec::with_capacity(args.len() + 2); v.push("-c".to_string()); - v.push(uri.clone()); + v.push(uri.to_string()); v.extend(args.iter().map(|s| s.to_string())); v }; let cmd_desc = format!("virsh {}", args_vec.join(" ")); match task::spawn_blocking(move || { - // Ensure cache dir exists to avoid virsh trying to write under /nonexistent - let _ = std::fs::create_dir_all(format!("{}/libvirt", cache_base)); let mut cmd = Command::new("virsh"); - cmd.args(&args_vec) - .env("LIBVIRT_DEFAULT_URI", &uri) - .env("XDG_CACHE_HOME", &cache_base) - .env("HOME", &home_dir); + cmd.args(&args_vec); cmd.output() }).await { Ok(Ok(out)) => { @@ -416,7 +412,6 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option { - // spawn or io error Attempt { cmd: cmd_desc, ok: false, status: None, stdout: String::new(), stderr: format!("spawn error: {:?}", other) } } } @@ -430,7 +425,7 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option Option = None; let mut net_name: Option = None; - let att_domiflist = run_cmd(&["domiflist", domain]).await; + let att_domiflist = run_cmd(&["domiflist", domain], libvirt_uri).await; debug!(domain=%domain, method="domiflist", ok=att_domiflist.ok, status=?att_domiflist.status, stdout=%att_domiflist.stdout, stderr=%att_domiflist.stderr, cmd=%att_domiflist.cmd, "virsh attempt"); if att_domiflist.ok { for line in att_domiflist.stdout.lines().skip(2) { // skip header lines @@ -457,7 +452,7 @@ async fn discover_guest_ip_virsh(domain: &str, timeout: Duration) -> Option Option if let Some(net) = net_name.clone() { - let att_leases = run_cmd(&["net-dhcp-leases", &net]).await; + let att_leases = run_cmd(&["net-dhcp-leases", &net], libvirt_uri).await; debug!(domain=%domain, method="net-dhcp-leases", ok=att_leases.ok, status=?att_leases.status, stdout=%att_leases.stdout, stderr=%att_leases.stderr, cmd=%att_leases.cmd, "virsh attempt"); if att_leases.ok { if let Some(ref mac_s) = mac { @@ -803,6 +798,8 @@ mod tests { runner_illumos_path: "/tmp/runner-illumos".into(), ssh_connect_timeout_secs: 30, boot_wait_secs: 0, + libvirt_uri: "qemu:///system".into(), + libvirt_network: "default".into(), }; let sched = Scheduler::new(hv, 2, &caps, persist, Duration::from_millis(10), Arc::new(common::MqConfig::default()), exec); let tx = sched.sender(); @@ -848,6 +845,8 @@ mod tests { runner_illumos_path: "/tmp/runner-illumos".into(), ssh_connect_timeout_secs: 30, boot_wait_secs: 0, + libvirt_uri: "qemu:///system".into(), + libvirt_network: "default".into(), }; let sched = Scheduler::new(hv, 4, &caps, persist, Duration::from_millis(10), Arc::new(common::MqConfig::default()), exec); let tx = sched.sender();