solstice-ci/crates/orchestrator/src/hypervisor.rs

448 lines
19 KiB
Rust
Raw Normal View History

use std::{path::PathBuf, time::Duration};
use async_trait::async_trait;
use miette::{Result, IntoDiagnostic as _};
use tracing::{info, warn};
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
// Backend tag is used internally to remember which backend handled this VM.
#[derive(Debug, Clone, Copy)]
pub enum BackendTag { Noop, #[cfg(all(target_os = "linux", feature = "libvirt"))] Libvirt, #[cfg(target_os = "illumos")] Zones }
#[derive(Debug, Clone)]
pub struct VmSpec {
pub label: String,
pub image_path: PathBuf,
pub cpu: u16,
pub ram_mb: u32,
pub disk_gb: u32,
pub network: Option<String>,
pub nocloud: bool,
/// Optional user-data (cloud-init NoCloud). If provided, backend will attach seed.
pub user_data: Option<Vec<u8>>,
}
#[derive(Debug, Clone)]
pub struct JobContext {
pub request_id: uuid::Uuid,
pub repo_url: String,
pub commit_sha: String,
pub workflow_job_id: Option<String>,
}
#[derive(Debug, Clone)]
pub struct VmHandle {
pub id: String,
pub backend: BackendTag,
pub work_dir: PathBuf,
pub overlay_path: Option<PathBuf>,
pub seed_iso_path: Option<PathBuf>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum VmState {
Prepared,
Running,
Stopped,
}
#[async_trait]
pub trait Hypervisor: Send + Sync {
async fn prepare(&self, spec: &VmSpec, ctx: &JobContext) -> Result<VmHandle>;
async fn start(&self, vm: &VmHandle) -> Result<()>;
async fn stop(&self, vm: &VmHandle, graceful_timeout: Duration) -> Result<()>;
async fn destroy(&self, vm: VmHandle) -> Result<()>;
async fn state(&self, _vm: &VmHandle) -> Result<VmState> { Ok(VmState::Prepared) }
}
/// A router that delegates to the correct backend implementation per job.
pub struct RouterHypervisor {
pub noop: NoopHypervisor,
#[cfg(all(target_os = "linux", feature = "libvirt"))]
pub libvirt: Option<LibvirtHypervisor>,
#[cfg(target_os = "illumos")]
pub zones: Option<ZonesHypervisor>,
}
impl RouterHypervisor {
#[allow(unused_variables)]
pub fn build(libvirt_uri: String, libvirt_network: String) -> Self {
#[cfg(all(target_os = "linux", feature = "libvirt"))]
{
return RouterHypervisor {
noop: NoopHypervisor::default(),
libvirt: Some(LibvirtHypervisor { uri: libvirt_uri, network: libvirt_network }),
};
}
#[cfg(target_os = "illumos")]
{
return RouterHypervisor { noop: NoopHypervisor::default(), zones: Some(ZonesHypervisor) };
}
#[cfg(all(not(target_os = "illumos"), not(all(target_os = "linux", feature = "libvirt"))))]
{
return RouterHypervisor { noop: NoopHypervisor::default() };
}
}
}
#[async_trait]
impl Hypervisor for RouterHypervisor {
async fn prepare(&self, spec: &VmSpec, ctx: &JobContext) -> Result<VmHandle> {
#[cfg(all(target_os = "linux", feature = "libvirt"))]
{
if let Some(ref hv) = self.libvirt { return hv.prepare(spec, ctx).await; }
}
#[cfg(target_os = "illumos")]
{
if let Some(ref hv) = self.zones { return hv.prepare(spec, ctx).await; }
}
self.noop.prepare(spec, ctx).await
}
async fn start(&self, vm: &VmHandle) -> Result<()> {
match vm.backend {
#[cfg(all(target_os = "linux", feature = "libvirt"))]
BackendTag::Libvirt => {
if let Some(ref hv) = self.libvirt { hv.start(vm).await } else { self.noop.start(vm).await }
}
#[cfg(target_os = "illumos")]
BackendTag::Zones => {
if let Some(ref hv) = self.zones { hv.start(vm).await } else { self.noop.start(vm).await }
}
_ => self.noop.start(vm).await,
}
}
async fn stop(&self, vm: &VmHandle, t: Duration) -> Result<()> {
match vm.backend {
#[cfg(all(target_os = "linux", feature = "libvirt"))]
BackendTag::Libvirt => {
if let Some(ref hv) = self.libvirt { hv.stop(vm, t).await } else { self.noop.stop(vm, t).await }
}
#[cfg(target_os = "illumos")]
BackendTag::Zones => {
if let Some(ref hv) = self.zones { hv.stop(vm, t).await } else { self.noop.stop(vm, t).await }
}
_ => self.noop.stop(vm, t).await,
}
}
async fn destroy(&self, vm: VmHandle) -> Result<()> {
match vm.backend {
#[cfg(all(target_os = "linux", feature = "libvirt"))]
BackendTag::Libvirt => {
if let Some(ref hv) = self.libvirt { hv.destroy(vm).await } else { self.noop.destroy(vm).await }
}
#[cfg(target_os = "illumos")]
BackendTag::Zones => {
if let Some(ref hv) = self.zones { hv.destroy(vm).await } else { self.noop.destroy(vm).await }
}
_ => self.noop.destroy(vm).await,
}
}
}
/// No-op hypervisor for development on hosts without privileges.
#[derive(Debug, Clone, Default)]
pub struct NoopHypervisor;
#[async_trait]
impl Hypervisor for NoopHypervisor {
async fn prepare(&self, spec: &VmSpec, ctx: &JobContext) -> Result<VmHandle> {
let id = format!("noop-{}", ctx.request_id);
let work_dir = std::env::temp_dir().join("solstice-noop").join(&id);
tokio::fs::create_dir_all(&work_dir).await.into_diagnostic()?;
info!(id = %id, label = %spec.label, image = ?spec.image_path, "noop prepare");
Ok(VmHandle { id, backend: BackendTag::Noop, work_dir, overlay_path: None, seed_iso_path: None })
}
async fn start(&self, vm: &VmHandle) -> Result<()> {
info!(id = %vm.id, "noop start");
Ok(())
}
async fn stop(&self, vm: &VmHandle, _t: Duration) -> Result<()> {
info!(id = %vm.id, "noop stop");
Ok(())
}
async fn destroy(&self, vm: VmHandle) -> Result<()> {
info!(id = %vm.id, "noop destroy");
Ok(())
}
}
#[cfg(all(target_os = "linux", feature = "libvirt"))]
pub struct LibvirtHypervisor {
pub uri: String,
pub network: String,
}
#[cfg(all(target_os = "linux", feature = "libvirt"))]
impl LibvirtHypervisor {
fn mk_work_dir(&self, id: &str) -> std::path::PathBuf {
// Prefer /var/lib/solstice-ci if writable, else tmp
let base = std::path::Path::new("/var/lib/solstice-ci");
let dir = if base.exists() && base.is_dir() && std::fs::metadata(base).is_ok() {
base.join(id)
} else {
std::env::temp_dir().join("solstice-libvirt").join(id)
};
let _ = std::fs::create_dir_all(&dir);
let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
dir
}
}
#[cfg(all(target_os = "linux", feature = "libvirt"))]
#[async_trait]
impl Hypervisor for LibvirtHypervisor {
async fn prepare(&self, spec: &VmSpec, ctx: &JobContext) -> Result<VmHandle> {
use std::process::Command;
let id = format!("job-{}", ctx.request_id);
let work_dir = self.mk_work_dir(&id);
// Ensure network is active via virt crate; best-effort
let uri = self.uri.clone();
let net_name = self.network.clone();
tokio::task::spawn_blocking(move || -> miette::Result<()> {
use virt::{connect::Connect, network::Network};
let conn = Connect::open(Some(&uri)).map_err(|e| miette::miette!("libvirt connect failed: {e}"))?;
if let Ok(net) = Network::lookup_by_name(&conn, &net_name) {
// If not active, try to create (activate). Then set autostart.
let active = net.is_active().unwrap_or(false);
if !active {
let _ = net.create();
}
let _ = net.set_autostart(true);
}
Ok(())
}).await.into_diagnostic()??;
// Create qcow2 overlay
let overlay = work_dir.join("overlay.qcow2");
let size_arg = format!("{}G", spec.disk_gb);
let status = tokio::task::spawn_blocking({
let base = spec.image_path.clone();
let overlay = overlay.clone();
move || -> miette::Result<()> {
// Detect base image format to set -F accordingly (raw or qcow2)
let base_fmt_out = std::process::Command::new("qemu-img")
.args(["info", "--output=json"])
.arg(&base)
.output()
.map_err(|e| miette::miette!("qemu-img not found or failed: {e}"))?;
if !base_fmt_out.status.success() {
return Err(miette::miette!(
"qemu-img info failed: {}",
String::from_utf8_lossy(&base_fmt_out.stderr)
));
}
let base_fmt: String = {
let v: serde_json::Value = serde_json::from_slice(&base_fmt_out.stdout)
.map_err(|e| miette::miette!("parse qemu-img info json failed: {e}"))?;
v.get("format")
.and_then(|f| f.as_str())
.unwrap_or("raw")
.to_string()
};
let out = Command::new("qemu-img")
.args(["create","-f","qcow2","-F"])
.arg(&base_fmt)
.args(["-b"])
.arg(&base)
.arg(&overlay)
.arg(&size_arg)
.output()
.map_err(|e| miette::miette!("qemu-img not found or failed: {e}"))?;
if !out.status.success() { return Err(miette::miette!("qemu-img create failed: {}", String::from_utf8_lossy(&out.stderr))); }
Ok(())
}
}).await.into_diagnostic()??;
let _ = status; // appease compiler if unused
// Build NoCloud seed ISO if user_data provided
let mut seed_iso: Option<PathBuf> = None;
if let Some(ref user_data) = spec.user_data {
let seed_dir = work_dir.join("seed");
tokio::fs::create_dir_all(&seed_dir).await.into_diagnostic()?;
let ud_path = seed_dir.join("user-data");
let md_path = seed_dir.join("meta-data");
tokio::fs::write(&ud_path, user_data).await.into_diagnostic()?;
let meta = format!("instance-id: {}\nlocal-hostname: {}\n", id, id);
tokio::fs::write(&md_path, meta.as_bytes()).await.into_diagnostic()?;
// mkisofs or genisoimage
let iso_path = work_dir.join("seed.iso");
tokio::task::spawn_blocking({
let iso_path = iso_path.clone();
let seed_dir = seed_dir.clone();
move || -> miette::Result<()> {
let try_mk = |bin: &str| -> std::io::Result<std::process::Output> {
Command::new(bin)
.args(["-V","cidata","-J","-R","-o"])
.arg(&iso_path)
.arg(&seed_dir)
.output()
};
let out = try_mk("mkisofs").or_else(|_| try_mk("genisoimage"))
.map_err(|e| miette::miette!("mkisofs/genisoimage not found: {e}"))?;
if !out.status.success() { return Err(miette::miette!("mkisofs failed: {}", String::from_utf8_lossy(&out.stderr))); }
Ok(())
}
}).await.into_diagnostic()??;
seed_iso = Some(iso_path);
}
// Domain XML
let xml = {
let mem = spec.ram_mb;
let vcpus = spec.cpu;
let overlay_str = overlay.display().to_string();
let seed_str = seed_iso.as_ref().map(|p| p.display().to_string());
let net = self.network.clone();
let cdrom = seed_str.map(|p| format!("<disk type='file' device='cdrom'>\n <driver name='qemu' type='raw'/>\n <source file='{}'/>\n <target dev='hdb' bus='ide'/>\n <readonly/>\n</disk>", p)).unwrap_or_default();
format!("<domain type='kvm'>\n<name>{}</name>\n<memory unit='MiB'>{}</memory>\n<vcpu>{}</vcpu>\n<os>\n <type arch='x86_64' machine='pc'>hvm</type>\n <boot dev='hd'/>\n</os>\n<features><acpi/></features>\n<devices>\n <disk type='file' device='disk'>\n <driver name='qemu' type='qcow2' cache='none'/>\n <source file='{}'/>\n <target dev='vda' bus='virtio'/>\n </disk>\n {}\n <interface type='network'>\n <source network='{}'/>\n <model type='virtio'/>\n </interface>\n <graphics type='vnc' autoport='yes' listen='127.0.0.1'/>\n <console type='pty'/>\n</devices>\n<on_poweroff>destroy</on_poweroff>\n<on_crash>destroy</on_crash>\n</domain>",
id, mem, vcpus, overlay_str, cdrom, net)
};
// Define via virt crate
let uri2 = self.uri.clone();
let xml_clone = xml.clone();
tokio::task::spawn_blocking(move || -> miette::Result<()> {
use virt::{connect::Connect, domain::Domain};
let conn = Connect::open(Some(&uri2)).map_err(|e| miette::miette!("libvirt connect failed: {e}"))?;
let _dom = Domain::define_xml(&conn, &xml_clone).map_err(|e| miette::miette!("define domain failed: {e}"))?;
Ok(())
}).await.into_diagnostic()??;
info!(domain = %id, image = ?spec.image_path, cpu = spec.cpu, ram_mb = spec.ram_mb, "libvirt prepared");
Ok(VmHandle { id, backend: BackendTag::Libvirt, work_dir, overlay_path: Some(overlay), seed_iso_path: seed_iso })
}
async fn start(&self, vm: &VmHandle) -> Result<()> {
let id = vm.id.clone();
let uri = self.uri.clone();
tokio::task::spawn_blocking(move || -> miette::Result<()> {
use virt::{connect::Connect, domain::Domain};
let conn = Connect::open(Some(&uri)).map_err(|e| miette::miette!("libvirt connect failed: {e}"))?;
// Lookup domain by name and start
let dom = Domain::lookup_by_name(&conn, &id).map_err(|e| miette::miette!("lookup domain failed: {e}"))?;
dom.create().map_err(|e| miette::miette!("domain start failed: {e}"))?;
Ok(())
}).await.into_diagnostic()??;
info!(domain = %vm.id, "libvirt started");
Ok(())
}
async fn stop(&self, vm: &VmHandle, t: Duration) -> Result<()> {
let id = vm.id.clone();
let uri = self.uri.clone();
tokio::task::spawn_blocking(move || -> miette::Result<()> {
use virt::{connect::Connect, domain::Domain};
let conn = Connect::open(Some(&uri)).map_err(|e| miette::miette!("libvirt connect failed: {e}"))?;
let dom = Domain::lookup_by_name(&conn, &id).map_err(|e| miette::miette!("lookup domain failed: {e}"))?;
let _ = dom.shutdown();
let start = std::time::Instant::now();
while start.elapsed() < t {
match dom.is_active() {
Ok(false) => break,
_ => {}
}
std::thread::sleep(std::time::Duration::from_millis(500));
}
// Force destroy if still active
let _ = dom.destroy();
Ok(())
}).await.into_diagnostic()??;
info!(domain = %vm.id, "libvirt stopped");
Ok(())
}
async fn destroy(&self, vm: VmHandle) -> Result<()> {
let id = vm.id.clone();
let uri = self.uri.clone();
let id_for_task = id.clone();
tokio::task::spawn_blocking(move || -> miette::Result<()> {
use virt::{connect::Connect, domain::Domain};
let conn = Connect::open(Some(&uri)).map_err(|e| miette::miette!("libvirt connect failed: {e}"))?;
if let Ok(dom) = Domain::lookup_by_name(&conn, &id_for_task) {
let _ = dom.undefine();
}
Ok(())
}).await.into_diagnostic()??;
// Cleanup files
if let Some(p) = vm.overlay_path.as_ref() { let _ = tokio::fs::remove_file(p).await; }
if let Some(p) = vm.seed_iso_path.as_ref() { let _ = tokio::fs::remove_file(p).await; }
let _ = tokio::fs::remove_dir_all(&vm.work_dir).await;
info!(domain = %id, "libvirt destroyed");
Ok(())
}
}
#[cfg(target_os = "illumos")]
pub struct ZonesHypervisor;
#[cfg(target_os = "illumos")]
#[async_trait]
impl Hypervisor for ZonesHypervisor {
async fn prepare(&self, spec: &VmSpec, ctx: &JobContext) -> Result<VmHandle> {
use std::process::Command;
let id = format!("zone-{}", ctx.request_id);
// Create working directory under /var/lib/solstice-ci if possible
let work_dir = {
let base = std::path::Path::new("/var/lib/solstice-ci");
let dir = if base.exists() && base.is_dir() && std::fs::metadata(base).is_ok() {
base.join(&id)
} else {
std::env::temp_dir().join("solstice-zones").join(&id)
};
let _ = std::fs::create_dir_all(&dir);
#[cfg(unix)]
let _ = std::fs::set_permissions(&dir, std::fs::Permissions::from_mode(0o700));
dir
};
// Detect base image format
let base = spec.image_path.clone();
let base_fmt = tokio::task::spawn_blocking(move || -> miette::Result<String> {
let out = Command::new("qemu-img")
.args(["info", "--output=json"]).arg(&base)
.output()
.map_err(|e| miette::miette!("qemu-img not found or failed: {e}"))?;
if !out.status.success() {
return Err(miette::miette!("qemu-img info failed: {}", String::from_utf8_lossy(&out.stderr)));
}
let v: serde_json::Value = serde_json::from_slice(&out.stdout)
.map_err(|e| miette::miette!("parse qemu-img info json failed: {e}"))?;
Ok(v.get("format").and_then(|f| f.as_str()).unwrap_or("raw").to_string())
}).await.into_diagnostic()??;
// Ensure raw image for bhyve: convert if needed
let raw_path = if base_fmt != "raw" {
let out_path = work_dir.join("disk.raw");
let src = spec.image_path.clone();
let dst = out_path.clone();
tokio::task::spawn_blocking(move || -> miette::Result<()> {
let out = Command::new("qemu-img")
.args(["convert", "-O", "raw"])
.arg(&src)
.arg(&dst)
.output()
.map_err(|e| miette::miette!("qemu-img convert failed to start: {e}"))?;
if !out.status.success() {
return Err(miette::miette!("qemu-img convert failed: {}", String::from_utf8_lossy(&out.stderr)));
}
Ok(())
}).await.into_diagnostic()??;
info!(label = %spec.label, src = ?spec.image_path, out = ?out_path, "converted image to raw for bhyve");
out_path
} else {
spec.image_path.clone()
};
// Seed ISO creation left to future; for now, return handle with path in overlay_path
Ok(VmHandle { id, backend: BackendTag::Zones, work_dir, overlay_path: Some(raw_path), seed_iso_path: None })
}
async fn start(&self, _vm: &VmHandle) -> Result<()> { Ok(()) }
async fn stop(&self, _vm: &VmHandle, _t: Duration) -> Result<()> { Ok(()) }
async fn destroy(&self, _vm: VmHandle) -> Result<()> { Ok(()) }
}