solstice-ci/crates/workflow-runner/src/main.rs

431 lines
16 KiB
Rust
Raw Normal View History

use clap::Parser;
use miette::{IntoDiagnostic as _, Result};
use serde::Deserialize;
use std::process::Stdio;
use std::collections::VecDeque;
use std::sync::{Arc, Mutex};
2025-11-01 14:56:46 +01:00
use tokio::{
fs,
io::{AsyncBufReadExt, BufReader},
process::Command,
};
use tracing::{error, info, warn};
fn ndjson_line(category: &str, level: &str, msg: &str, extra: Option<serde_json::Value>) -> String {
let mut obj = serde_json::json!({
"category": category,
"level": level,
"msg": msg,
});
if let Some(ext) = extra {
if let Some(map) = obj.as_object_mut() {
if let Some(eo) = ext.as_object() {
for (k, v) in eo.iter() {
map.insert(k.clone(), v.clone());
}
}
}
}
obj.to_string()
}
#[derive(Parser, Debug)]
2025-11-01 14:56:46 +01:00
#[command(
name = "solstice-runner",
version,
about = "Solstice CI Workflow Runner (VM agent)"
)]
struct Opts {
/// Optional path to workflow KDL file (for local testing only)
#[arg(long, env = "SOL_WORKFLOW_PATH")]
workflow: Option<String>,
}
#[derive(Debug, Deserialize)]
struct JobFile {
repo_url: String,
commit_sha: String,
}
async fn read_job_file() -> Result<JobFile> {
2025-11-01 14:56:46 +01:00
let path =
std::env::var("SOLSTICE_JOB_FILE").unwrap_or_else(|_| "/etc/solstice/job.yaml".into());
let bytes = fs::read(&path).await.into_diagnostic()?;
let jf: JobFile = serde_yaml::from_slice(&bytes).into_diagnostic()?;
Ok(jf)
}
async fn run_shell(cmd: &str) -> Result<i32> {
info!(%cmd, "exec");
2025-11-01 14:56:46 +01:00
let status = Command::new("/bin/sh")
.arg("-lc")
.arg(cmd)
.status()
.await
.into_diagnostic()?;
let code = status.code().unwrap_or(1);
if code != 0 {
return Err(miette::miette!("command failed ({code}): {cmd}"));
}
Ok(code)
}
fn to_https_if_codeberg(repo: &str) -> String {
// Convert SSH Codeberg URL to HTTPS for anonymous fetches
if let Some(rest) = repo.strip_prefix("ssh://git@codeberg.org/") {
return format!("https://codeberg.org/{rest}");
}
repo.to_string()
}
async fn has_cmd(name: &str) -> bool {
tokio::process::Command::new("/bin/sh")
.arg("-lc")
.arg(format!("command -v {} >/dev/null 2>&1", name))
.status()
.await
.ok()
.and_then(|s| s.code())
.map(|c| c == 0)
.unwrap_or(false)
}
async fn check_writable(dir: &str) -> bool {
if fs::create_dir_all(dir).await.is_err() { return false; }
let test_path = format!("{}/.solstice-writecheck", dir.trim_end_matches('/'));
match fs::write(&test_path, b"ok").await {
Ok(_) => {
let _ = fs::remove_file(&test_path).await; true
}
Err(_) => false,
}
}
fn parse_repo_host_port(repo: &str) -> Option<(String, u16)> {
let r = repo.trim();
if let Some(rest) = r.strip_prefix("https://") {
let host = rest.split('/').next()?.split('@').last()?.split(':').next()?; // ignore embedded user
return Some((host.to_string(), 443));
}
if let Some(rest) = r.strip_prefix("http://") {
let host = rest.split('/').next()?.split('@').last()?.split(':').next()?;
return Some((host.to_string(), 80));
}
if let Some(rest) = r.strip_prefix("ssh://") {
// ssh://[user@]host/owner/repo
let first = rest.split('/').next()?;
let host = first.split('@').last()?.split(':').next()?;
return Some((host.to_string(), 22));
}
// scp-like: user@host:owner/repo.git
if let Some(at) = r.find('@') {
if let Some(colon) = r[at+1..].find(':') { // ensure host:
let host = &r[at+1..at+1+colon];
if !host.is_empty() { return Some((host.to_string(), 22)); }
}
}
None
}
async fn check_network_connect(host: &str, port: u16, timeout_ms: u64) -> bool {
use tokio::time::{timeout, Duration};
match timeout(Duration::from_millis(timeout_ms), tokio::net::TcpStream::connect((host, port))).await {
Ok(Ok(_stream)) => { true }
_ => false,
}
}
async fn preflight(repo: &str, workdir: &str) -> Result<()> {
// Tool availability
let has_git = has_cmd("git").await;
let has_curl = has_cmd("curl").await;
let has_wget = has_cmd("wget").await;
let has_tar = has_cmd("tar").await;
for (tool, ok) in [("git", has_git), ("curl", has_curl), ("wget", has_wget), ("tar", has_tar)] {
let lvl = if ok { "info" } else { "warn" };
println!("{}", ndjson_line("tool_check", lvl, tool, Some(serde_json::json!({"available": ok}))));
}
let can_clone = has_git || (has_tar && (has_curl || has_wget));
let lvl = if can_clone { "info" } else { "error" };
println!(
"{}",
ndjson_line(
"env_setup",
lvl,
"clone capability",
Some(serde_json::json!({
"git": has_git,
"tar": has_tar,
"curl": has_curl,
"wget": has_wget,
"can_clone": can_clone
}))
)
);
if !can_clone {
return Err(miette::miette!("no available method to fetch repository: need git or (tar and (curl|wget))"));
}
// Workdir writability
let writable = check_writable(workdir).await;
let lvl = if writable { "info" } else { "error" };
println!("{}", ndjson_line("env_setup", lvl, "workdir writable", Some(serde_json::json!({"path": workdir, "writable": writable}))));
if !writable { return Err(miette::miette!("workdir is not writable: {}", workdir)); }
// Network reachability (best-effort)
if let Some((host, port)) = parse_repo_host_port(repo) {
let ok = check_network_connect(&host, port, 2000).await;
let lvl = if ok { "info" } else { "warn" };
println!("{}", ndjson_line("env", lvl, "network connectivity", Some(serde_json::json!({"host": host, "port": port, "reachable": ok}))));
}
Ok(())
}
async fn fetch_repo_via_archive(repo_https: &str, sha: &str, workdir: &str) -> Result<()> {
// Announce chosen method
println!("{}", ndjson_line("env_setup", "info", "fetch via http archive", Some(serde_json::json!({
"url": format!("{}/archive/{}.tar.gz", repo_https.trim_end_matches('.').trim_end_matches(".git"), sha)
}))));
// Gitea/Codeberg archive URL pattern: https://codeberg.org/<owner>/<repo>/archive/<sha>.tar.gz
let base = repo_https.trim_end_matches('.').trim_end_matches(".git");
let url = format!("{}/archive/{}.tar.gz", base, sha);
// Check if we should allow insecure TLS (last resort)
let insecure = std::env::var("SOLSTICE_ALLOW_INSECURE").ok().map(|v| v == "1" || v.eq_ignore_ascii_case("true")).unwrap_or(false);
let curl_flags = if insecure { "-fSLk" } else { "-fSL" };
// Try curl | tar, then wget | tar
let cmd_curl = format!(
"mkdir -p {workdir} && curl {curl_flags} {url} | tar -xz -C {workdir} --strip-components=1"
);
if run_shell(&cmd_curl).await.is_ok() {
return Ok(());
}
let cmd_wget = format!(
"mkdir -p {workdir} && wget -qO- {url} | tar -xz -C {workdir} --strip-components=1"
);
if run_shell(&cmd_wget).await.is_ok() {
return Ok(());
}
// On illumos/SunOS images, curl/wget may be missing or CA bundle absent. Try to install tools and CA certs, then retry.
let os = std::env::var("SOLSTICE_OS_OVERRIDE").ok().unwrap_or_else(|| {
// Best-effort OS detection
std::env::consts::OS.to_string()
});
// Prefer uname if available
let uname = Command::new("/bin/sh").arg("-lc").arg("uname -s 2>/dev/null || echo unknown").output().await.ok()
.and_then(|o| String::from_utf8(o.stdout).ok()).unwrap_or_default();
let is_sunos = uname.trim() == "SunOS" || os == "solaris";
if is_sunos {
// Try IPS (pkg) first, then pkgin
let _ = run_shell("sudo pkg refresh || true").await;
// curl
if run_shell("sudo pkg install -v web/curl").await.is_err() {
let _ = run_shell("sudo pkgin -y install curl").await;
}
// CA certificates (package name may differ per distro)
let _ = run_shell("sudo pkg install -v web/ca-certificates || sudo pkg install -v library/security/ca-certificates || true").await;
let _ = run_shell("sudo pkgin -y install mozilla-rootcerts || true").await;
let _ = run_shell("sudo mozilla-rootcerts install || true").await;
// Retry with curl and wget
if run_shell(&cmd_curl).await.is_ok() {
return Ok(());
}
if run_shell(&cmd_wget).await.is_ok() {
return Ok(());
}
// As a last resort with explicit opt-in, try curl --insecure
if insecure {
let cmd_curl_insecure = format!(
"mkdir -p {workdir} && curl -fSLk {url} | tar -xz -C {workdir} --strip-components=1"
);
if run_shell(&cmd_curl_insecure).await.is_ok() {
warn!("used curl --insecure to fetch repo archive on SunOS");
return Ok(());
}
}
}
Err(miette::miette!("failed to fetch repo archive via HTTP for {url}"))
}
async fn ensure_repo(repo: &str, sha: &str, workdir: &str) -> Result<()> {
fs::create_dir_all(workdir).await.into_diagnostic()?;
let repo_eff = to_https_if_codeberg(repo);
// Prefer git when available; fall back to archive download when git is missing or fetch fails.
if has_cmd("git").await {
let cmds = vec![
format!("cd {workdir} && git init"),
format!(
"cd {workdir} && git remote remove origin >/dev/null 2>&1 || true && git remote add origin {repo_eff}"
),
format!("cd {workdir} && git fetch --depth=1 origin {sha}"),
format!("cd {workdir} && git checkout -q FETCH_HEAD"),
];
for c in cmds {
match run_shell(&c).await {
Ok(_) => {}
Err(e) => {
// Try archive fallback once on any git failure
warn!(error = %e, "git path failed; attempting archive fallback");
return fetch_repo_via_archive(&repo_eff, sha, workdir).await;
}
}
}
Ok(())
} else {
fetch_repo_via_archive(&repo_eff, sha, workdir).await
2025-11-01 14:56:46 +01:00
}
}
async fn run_job_script(workdir: &str) -> Result<i32> {
let script = format!("{}/.solstice/job.sh", workdir);
if !fs::try_exists(&script).await.into_diagnostic()? {
warn!(path = %script, "job script not found");
eprintln!("{}", ndjson_line("job_run", "error", &format!("job script not found at {}", script), None));
return Ok(1);
}
// Emit explicit pre-exec line to aid diagnostics
println!("{}", ndjson_line("job_run", "info", &format!("executing {}", script), None));
let _ = run_shell(&format!("chmod +x {} || true", script)).await?;
let mut cmd = Command::new("/bin/sh");
2025-11-01 14:56:46 +01:00
cmd.arg("-lc")
.arg(format!("cd {workdir} && {}", script))
.stdout(Stdio::piped())
.stderr(Stdio::piped());
let mut child = cmd.spawn().into_diagnostic()?;
// Buffer the last N stderr lines for failure summary
let last_err: Arc<Mutex<VecDeque<String>>> = Arc::new(Mutex::new(VecDeque::with_capacity(20)));
// Attach readers to child stdout/stderr so logs stream as NDJSON categorized under job_run
if let Some(stdout) = child.stdout.take() {
let mut reader = BufReader::new(stdout);
tokio::spawn(async move {
loop {
let mut buf = Vec::with_capacity(256);
match reader.read_until(b'\n', &mut buf).await {
Ok(0) => break,
Ok(_) => {
let line = String::from_utf8_lossy(&buf).trim_end_matches(['\n', '\r']).to_string();
println!("{}", ndjson_line("job_run", "info", &line, None));
}
Err(e) => {
eprintln!("{}", ndjson_line("job_run", "error", &format!("error reading stdout: {}", e), None));
break;
}
}
}
});
}
if let Some(stderr) = child.stderr.take() {
let mut reader = BufReader::new(stderr);
let last_err2 = last_err.clone();
tokio::spawn(async move {
loop {
let mut buf = Vec::with_capacity(256);
match reader.read_until(b'\n', &mut buf).await {
Ok(0) => break,
Ok(_) => {
let line = String::from_utf8_lossy(&buf).trim_end_matches(['\n', '\r']).to_string();
eprintln!("{}", ndjson_line("job_run", "error", &line, None));
if let Ok(mut dq) = last_err2.lock() {
if dq.len() == 20 { dq.pop_front(); }
dq.push_back(line);
}
}
Err(e) => {
eprintln!("{}", ndjson_line("job_run", "error", &format!("error reading stderr: {}", e), None));
break;
}
}
}
});
}
let status = child.wait().await.into_diagnostic()?;
let code = status.code().unwrap_or(1);
if code != 0 {
// Emit a concise failure summary (structured)
eprintln!("{}", ndjson_line("job_run", "error", &format!("job script exited with code {}", code), None));
// Include recent stderr lines for context (structured)
let lines: Vec<String> = last_err
.lock()
.ok()
.map(|dq| dq.iter().cloned().collect())
.unwrap_or_default();
if lines.is_empty() {
eprintln!("{}", ndjson_line("job_run", "warn", "no stderr lines were captured from the script", None));
} else {
eprintln!("{}", ndjson_line("job_run", "info", "recent stderr lines follow", None));
for l in lines {
eprintln!("{}", ndjson_line("job_run", "error", &l, None));
}
}
}
Ok(code)
}
#[tokio::main(flavor = "multi_thread")]
async fn main() -> Result<()> {
let _t = common::init_tracing("solstice-workflow-runner")?;
let _opts = Opts::parse();
// Try env overrides first for robustness
let repo = std::env::var("SOLSTICE_REPO_URL").ok();
let sha = std::env::var("SOLSTICE_COMMIT_SHA").ok();
let (repo, sha) = match (repo, sha) {
(Some(r), Some(s)) => (r, s),
_ => {
let jf = read_job_file().await?;
(jf.repo_url, jf.commit_sha)
}
};
info!(%repo, %sha, "runner starting");
let workdir = std::env::var("SOLSTICE_WORKDIR").unwrap_or_else(|_| "/root/work".into());
// Emit startup environment and tool checks
let uname = Command::new("/bin/sh").arg("-lc").arg("uname -a || echo unknown").output().await.ok()
.and_then(|o| String::from_utf8(o.stdout).ok()).unwrap_or_else(|| "unknown".into());
println!("{}", ndjson_line("env", "info", "system", Some(serde_json::json!({"uname": uname.trim()}))));
// Preflight environment checks (tools, workdir, network)
if let Err(e) = preflight(&repo, &workdir).await {
eprintln!("{}", ndjson_line("env_setup", "error", &format!("preflight failed: {}", e), None));
std::process::exit(1);
}
// Announce workspace
println!("{}", ndjson_line("env_setup", "info", "workdir", Some(serde_json::json!({"path": workdir}))));
let code = match ensure_repo(&repo, &sha, &workdir).await {
Ok(_) => {
// proceed to run job script
run_job_script(&workdir).await?
}
Err(e) => {
eprintln!("[runner] failed to prepare repo: {}", e);
1
}
};
if code != 0 {
error!(exit_code = code, "job script failed");
std::process::exit(code);
}
info!("job complete");
Ok(())
}