mirror of
https://codeberg.org/Toasterson/solstice-ci.git
synced 2026-04-10 21:30:41 +00:00
Add VM state monitoring and graceful shutdown enhancements
This commit enhances the `Scheduler` to monitor VM states for completion, enabling more accurate termination detection. It introduces periodic polling combined with shutdown signals to halt operations gracefully. Additionally, VM lifecycle management in the hypervisor is updated with `state` retrieval for precise status assessments. The VM domain configuration now includes serial console support.
This commit is contained in:
parent
bddd36b16f
commit
4ca78144f2
3 changed files with 51 additions and 7 deletions
|
|
@ -138,6 +138,19 @@ impl Hypervisor for RouterHypervisor {
|
||||||
_ => self.noop.destroy(vm).await,
|
_ => self.noop.destroy(vm).await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
async fn state(&self, vm: &VmHandle) -> Result<VmState> {
|
||||||
|
match vm.backend {
|
||||||
|
#[cfg(all(target_os = "linux", feature = "libvirt"))]
|
||||||
|
BackendTag::Libvirt => {
|
||||||
|
if let Some(ref hv) = self.libvirt { hv.state(vm).await } else { Ok(VmState::Prepared) }
|
||||||
|
}
|
||||||
|
#[cfg(target_os = "illumos")]
|
||||||
|
BackendTag::Zones => {
|
||||||
|
if let Some(ref hv) = self.zones { hv.state(vm).await } else { Ok(VmState::Prepared) }
|
||||||
|
}
|
||||||
|
_ => Ok(VmState::Prepared),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// No-op hypervisor for development on hosts without privileges.
|
/// No-op hypervisor for development on hosts without privileges.
|
||||||
|
|
@ -299,7 +312,7 @@ impl Hypervisor for LibvirtHypervisor {
|
||||||
let seed_str = seed_iso.as_ref().map(|p| p.display().to_string());
|
let seed_str = seed_iso.as_ref().map(|p| p.display().to_string());
|
||||||
let net = self.network.clone();
|
let net = self.network.clone();
|
||||||
let cdrom = seed_str.map(|p| format!("<disk type='file' device='cdrom'>\n <driver name='qemu' type='raw'/>\n <source file='{}'/>\n <target dev='hdb' bus='ide'/>\n <readonly/>\n</disk>", p)).unwrap_or_default();
|
let cdrom = seed_str.map(|p| format!("<disk type='file' device='cdrom'>\n <driver name='qemu' type='raw'/>\n <source file='{}'/>\n <target dev='hdb' bus='ide'/>\n <readonly/>\n</disk>", p)).unwrap_or_default();
|
||||||
format!("<domain type='kvm'>\n<name>{}</name>\n<memory unit='MiB'>{}</memory>\n<vcpu>{}</vcpu>\n<os>\n <type arch='x86_64' machine='pc'>hvm</type>\n <boot dev='hd'/>\n</os>\n<features><acpi/></features>\n<devices>\n <disk type='file' device='disk'>\n <driver name='qemu' type='qcow2' cache='none'/>\n <source file='{}'/>\n <target dev='vda' bus='virtio'/>\n </disk>\n {}\n <interface type='network'>\n <source network='{}'/>\n <model type='virtio'/>\n </interface>\n <graphics type='vnc' autoport='yes' listen='127.0.0.1'/>\n <console type='pty'/>\n</devices>\n<on_poweroff>destroy</on_poweroff>\n<on_crash>destroy</on_crash>\n</domain>",
|
format!("<domain type='kvm'>\n<name>{}</name>\n<memory unit='MiB'>{}</memory>\n<vcpu>{}</vcpu>\n<os>\n <type arch='x86_64' machine='pc'>hvm</type>\n <boot dev='hd'/>\n</os>\n<features><acpi/></features>\n<devices>\n <disk type='file' device='disk'>\n <driver name='qemu' type='qcow2' cache='none'/>\n <source file='{}'/>\n <target dev='vda' bus='virtio'/>\n </disk>\n {}\n <interface type='network'>\n <source network='{}'/>\n <model type='virtio'/>\n </interface>\n <graphics type='vnc' autoport='yes' listen='127.0.0.1'/>\n <serial type='pty'>\n <target port='0'/>\n </serial>\n <console type='pty'>\n <target type='serial' port='0'/>\n </console>\n</devices>\n<on_poweroff>destroy</on_poweroff>\n<on_crash>destroy</on_crash>\n</domain>",
|
||||||
id, mem, vcpus, overlay_str, cdrom, net)
|
id, mem, vcpus, overlay_str, cdrom, net)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -375,6 +388,19 @@ impl Hypervisor for LibvirtHypervisor {
|
||||||
info!(domain = %id, "libvirt destroyed");
|
info!(domain = %id, "libvirt destroyed");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn state(&self, vm: &VmHandle) -> Result<VmState> {
|
||||||
|
let id = vm.id.clone();
|
||||||
|
let uri = self.uri.clone();
|
||||||
|
let active = tokio::task::spawn_blocking(move || -> miette::Result<bool> {
|
||||||
|
use virt::{connect::Connect, domain::Domain};
|
||||||
|
let conn = Connect::open(Some(&uri)).map_err(|e| miette::miette!("libvirt connect failed: {e}"))?;
|
||||||
|
let dom = Domain::lookup_by_name(&conn, &id).map_err(|e| miette::miette!("lookup domain failed: {e}"))?;
|
||||||
|
let active = dom.is_active().unwrap_or(false);
|
||||||
|
Ok(active)
|
||||||
|
}).await.into_diagnostic()??;
|
||||||
|
Ok(if active { VmState::Running } else { VmState::Stopped })
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_os = "illumos")]
|
#[cfg(target_os = "illumos")]
|
||||||
|
|
|
||||||
|
|
@ -221,6 +221,11 @@ write_files:
|
||||||
content: |
|
content: |
|
||||||
repo_url: {repo}
|
repo_url: {repo}
|
||||||
commit_sha: {sha}
|
commit_sha: {sha}
|
||||||
|
runcmd:
|
||||||
|
- [ sh, -c, "echo 'Solstice: preparing workspace for {sha}' | tee /dev/console" ]
|
||||||
|
- [ sh, -c, "mkdir -p /root/work && cd /root/work && if command -v git >/dev/null 2>&1; then git init && git remote add origin {repo} && git fetch --depth=1 origin {sha} && git checkout -q FETCH_HEAD || true; else echo 'git not installed'; fi" ]
|
||||||
|
- [ sh, -c, "if [ -f /root/work/.solstice/job.sh ]; then chmod +x /root/work/.solstice/job.sh && cd /root/work && /root/work/.solstice/job.sh || true; else echo 'No .solstice/job.sh found in repo'; fi" ]
|
||||||
|
- [ sh, -c, "echo 'Solstice: job complete, powering off' | tee /dev/console; (command -v poweroff >/dev/null 2>&1 && poweroff) || (command -v shutdown >/dev/null 2>&1 && shutdown -y -i5 -g0) || true" ]
|
||||||
"#, repo = repo_url, sha = commit_sha);
|
"#, repo = repo_url, sha = commit_sha);
|
||||||
s.into_bytes()
|
s.into_bytes()
|
||||||
}
|
}
|
||||||
|
|
@ -250,5 +255,7 @@ mod tests {
|
||||||
assert!(s.contains("commit_sha: deadbeef"));
|
assert!(s.contains("commit_sha: deadbeef"));
|
||||||
assert!(s.contains("write_files:"));
|
assert!(s.contains("write_files:"));
|
||||||
assert!(s.contains("/etc/solstice/job.yaml"));
|
assert!(s.contains("/etc/solstice/job.yaml"));
|
||||||
|
assert!(s.contains("runcmd:"));
|
||||||
|
assert!(s.contains("powering off"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -93,12 +93,23 @@ impl<H: Hypervisor + 'static> Scheduler<H> {
|
||||||
}
|
}
|
||||||
let _ = persist.record_vm_event(item.ctx.request_id, &h.id, overlay, seed, backend, VmPersistState::Running).await;
|
let _ = persist.record_vm_event(item.ctx.request_id, &h.id, overlay, seed, backend, VmPersistState::Running).await;
|
||||||
let _ = persist.record_job_state(item.ctx.request_id, &item.ctx.repo_url, &item.ctx.commit_sha, Some(&item.spec.label), JobState::Running).await;
|
let _ = persist.record_job_state(item.ctx.request_id, &item.ctx.repo_url, &item.ctx.commit_sha, Some(&item.spec.label), JobState::Running).await;
|
||||||
info!(request_id = %item.ctx.request_id, label = %label_key, "vm started (workload execution placeholder)");
|
info!(request_id = %item.ctx.request_id, label = %label_key, "vm started (monitoring for completion)");
|
||||||
// Placeholder job runtime (configurable), but end early on shutdown
|
// Monitor VM state until it stops or until placeholder_runtime elapses; end early on shutdown
|
||||||
|
let start_time = std::time::Instant::now();
|
||||||
|
loop {
|
||||||
|
// Check current state first
|
||||||
|
if let Ok(crate::hypervisor::VmState::Stopped) = hv.state(&h).await {
|
||||||
|
info!(request_id = %item.ctx.request_id, label = %label_key, "vm reported stopped");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if start_time.elapsed() >= placeholder_runtime { break; }
|
||||||
|
// Wait either for shutdown signal or a short delay before next poll
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = tokio::time::sleep(placeholder_runtime) => {},
|
|
||||||
_ = shutdown.notified() => {
|
_ = shutdown.notified() => {
|
||||||
info!(request_id = %item.ctx.request_id, label = %label_key, "shutdown: ending placeholder early");
|
info!(request_id = %item.ctx.request_id, label = %label_key, "shutdown: ending early");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(2)) => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Stop and destroy
|
// Stop and destroy
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue