mirror of
https://codeberg.org/Toasterson/solstice-ci.git
synced 2026-04-10 13:20:41 +00:00
Enhance runner with log streaming details, fallback repository fetch, and improved error handling
This commit improves the runner's functionality by adding: - Detailed log streaming with request ID, stdout, and stderr line counts. - Fallback mechanisms for repository fetch using HTTP archive when git commands fail. - Enhanced error reporting for missing job scripts and reading errors. - Updates to ensure compatibility with SunOS environments and non-interactive shells.
This commit is contained in:
parent
5cfde45e4c
commit
b84e97e513
3 changed files with 244 additions and 53 deletions
|
|
@ -4,6 +4,11 @@ set -euo pipefail
|
||||||
# The runner clones the repo at the requested commit and executes this script.
|
# The runner clones the repo at the requested commit and executes this script.
|
||||||
# It attempts to ensure required tools (git, curl, protobuf compiler, Rust) exist.
|
# It attempts to ensure required tools (git, curl, protobuf compiler, Rust) exist.
|
||||||
|
|
||||||
|
# Ensure a sane HOME even under non-login shells with set -u
|
||||||
|
export HOME=${HOME:-/root}
|
||||||
|
# Quieter noninteractive installs where supported
|
||||||
|
export DEBIAN_FRONTEND=${DEBIAN_FRONTEND:-noninteractive}
|
||||||
|
|
||||||
log() { printf "[job] %s\n" "$*" >&2; }
|
log() { printf "[job] %s\n" "$*" >&2; }
|
||||||
|
|
||||||
detect_pm() {
|
detect_pm() {
|
||||||
|
|
@ -71,7 +76,11 @@ ensure_rust() {
|
||||||
log "installing Rust toolchain with rustup"
|
log "installing Rust toolchain with rustup"
|
||||||
curl -fsSL https://sh.rustup.rs | sh -s -- -y
|
curl -fsSL https://sh.rustup.rs | sh -s -- -y
|
||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "$HOME/.cargo/env"
|
if [ -f "$HOME/.cargo/env" ]; then
|
||||||
|
. "$HOME/.cargo/env"
|
||||||
|
else
|
||||||
|
export PATH="$HOME/.cargo/bin:$PATH"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,11 @@ impl Runner for RunnerSvc {
|
||||||
let mut commit_sha: Option<String> = None;
|
let mut commit_sha: Option<String> = None;
|
||||||
let mut exit_code: i32 = 0;
|
let mut exit_code: i32 = 0;
|
||||||
let mut success: bool = true;
|
let mut success: bool = true;
|
||||||
|
let mut lines_stdout: usize = 0;
|
||||||
|
let mut lines_stderr: usize = 0;
|
||||||
|
let mut got_end: bool = false;
|
||||||
|
|
||||||
|
info!("runner log stream opened");
|
||||||
while let Some(item) = stream
|
while let Some(item) = stream
|
||||||
.next()
|
.next()
|
||||||
.await
|
.await
|
||||||
|
|
@ -42,7 +46,10 @@ impl Runner for RunnerSvc {
|
||||||
// Correlate request id
|
// Correlate request id
|
||||||
if req_id.is_none() {
|
if req_id.is_none() {
|
||||||
match uuid::Uuid::parse_str(&item.request_id) {
|
match uuid::Uuid::parse_str(&item.request_id) {
|
||||||
Ok(u) => req_id = Some(u),
|
Ok(u) => {
|
||||||
|
info!(request_id = %u, "runner log stream identified");
|
||||||
|
req_id = Some(u)
|
||||||
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
warn!(request_id = %item.request_id, "invalid request_id from runner");
|
warn!(request_id = %item.request_id, "invalid request_id from runner");
|
||||||
}
|
}
|
||||||
|
|
@ -52,8 +59,10 @@ impl Runner for RunnerSvc {
|
||||||
match ev {
|
match ev {
|
||||||
common::runner::v1::log_item::Event::Log(chunk) => {
|
common::runner::v1::log_item::Event::Log(chunk) => {
|
||||||
if chunk.stderr {
|
if chunk.stderr {
|
||||||
|
lines_stderr += 1;
|
||||||
info!(request_id = %item.request_id, line = %chunk.line, "runner:stderr");
|
info!(request_id = %item.request_id, line = %chunk.line, "runner:stderr");
|
||||||
} else {
|
} else {
|
||||||
|
lines_stdout += 1;
|
||||||
info!(request_id = %item.request_id, line = %chunk.line, "runner:stdout");
|
info!(request_id = %item.request_id, line = %chunk.line, "runner:stdout");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -62,11 +71,14 @@ impl Runner for RunnerSvc {
|
||||||
success = end.success;
|
success = end.success;
|
||||||
repo_url = Some(end.repo_url);
|
repo_url = Some(end.repo_url);
|
||||||
commit_sha = Some(end.commit_sha);
|
commit_sha = Some(end.commit_sha);
|
||||||
|
got_end = true;
|
||||||
|
info!(exit_code, success, "runner log stream received End");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!(lines_stdout, lines_stderr, got_end, "runner log stream closed");
|
||||||
// Publish final status if we have enough context
|
// Publish final status if we have enough context
|
||||||
if let (Some(id), Some(repo), Some(sha)) =
|
if let (Some(id), Some(repo), Some(sha)) =
|
||||||
(req_id.as_ref(), repo_url.as_ref(), commit_sha.as_ref())
|
(req_id.as_ref(), repo_url.as_ref(), commit_sha.as_ref())
|
||||||
|
|
|
||||||
|
|
@ -46,24 +46,107 @@ async fn run_shell(cmd: &str) -> Result<i32> {
|
||||||
.status()
|
.status()
|
||||||
.await
|
.await
|
||||||
.into_diagnostic()?;
|
.into_diagnostic()?;
|
||||||
Ok(status.code().unwrap_or(1))
|
let code = status.code().unwrap_or(1);
|
||||||
|
if code != 0 {
|
||||||
|
return Err(miette::miette!("command failed ({code}): {cmd}"));
|
||||||
|
}
|
||||||
|
Ok(code)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_https_if_codeberg(repo: &str) -> String {
|
||||||
|
// Convert SSH Codeberg URL to HTTPS for anonymous fetches
|
||||||
|
if let Some(rest) = repo.strip_prefix("ssh://git@codeberg.org/") {
|
||||||
|
return format!("https://codeberg.org/{rest}");
|
||||||
|
}
|
||||||
|
repo.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn has_cmd(name: &str) -> bool {
|
||||||
|
tokio::process::Command::new("/bin/sh")
|
||||||
|
.arg("-lc")
|
||||||
|
.arg(format!("command -v {} >/dev/null 2>&1", name))
|
||||||
|
.status()
|
||||||
|
.await
|
||||||
|
.ok()
|
||||||
|
.and_then(|s| s.code())
|
||||||
|
.map(|c| c == 0)
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_repo_via_archive(repo_https: &str, sha: &str, workdir: &str) -> Result<()> {
|
||||||
|
// Gitea/Codeberg archive URL pattern: https://codeberg.org/<owner>/<repo>/archive/<sha>.tar.gz
|
||||||
|
let base = repo_https.trim_end_matches('.').trim_end_matches(".git");
|
||||||
|
let url = format!("{}/archive/{}.tar.gz", base, sha);
|
||||||
|
|
||||||
|
// Try curl | tar, then wget | tar
|
||||||
|
let cmd_curl = format!(
|
||||||
|
"mkdir -p {workdir} && curl -fSL {url} | tar -xz -C {workdir} --strip-components=1"
|
||||||
|
);
|
||||||
|
if run_shell(&cmd_curl).await.is_ok() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let cmd_wget = format!(
|
||||||
|
"mkdir -p {workdir} && wget -qO- {url} | tar -xz -C {workdir} --strip-components=1"
|
||||||
|
);
|
||||||
|
if run_shell(&cmd_wget).await.is_ok() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// On illumos/SunOS images, curl/wget may be missing. Try to install curl and retry.
|
||||||
|
let os = std::env::var("SOLSTICE_OS_OVERRIDE").ok().unwrap_or_else(|| {
|
||||||
|
// Best-effort OS detection
|
||||||
|
std::env::consts::OS.to_string()
|
||||||
|
});
|
||||||
|
// Prefer uname if available
|
||||||
|
let uname = Command::new("/bin/sh").arg("-lc").arg("uname -s 2>/dev/null || echo unknown").output().await.ok()
|
||||||
|
.and_then(|o| String::from_utf8(o.stdout).ok()).unwrap_or_default();
|
||||||
|
let is_sunos = uname.trim() == "SunOS" || os == "solaris";
|
||||||
|
if is_sunos {
|
||||||
|
// Try IPS (pkg) first, then pkgin
|
||||||
|
let _ = run_shell("sudo pkg refresh || true").await;
|
||||||
|
if run_shell("sudo pkg install -v web/curl").await.is_err() {
|
||||||
|
let _ = run_shell("sudo pkgin -y install curl").await;
|
||||||
|
}
|
||||||
|
// Retry with curl
|
||||||
|
if run_shell(&cmd_curl).await.is_ok() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if run_shell(&cmd_wget).await.is_ok() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(miette::miette!("failed to fetch repo archive via HTTP for {url}"))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn ensure_repo(repo: &str, sha: &str, workdir: &str) -> Result<()> {
|
async fn ensure_repo(repo: &str, sha: &str, workdir: &str) -> Result<()> {
|
||||||
fs::create_dir_all(workdir).await.into_diagnostic()?;
|
fs::create_dir_all(workdir).await.into_diagnostic()?;
|
||||||
// Use system git to avoid libgit2 cross issues
|
let repo_eff = to_https_if_codeberg(repo);
|
||||||
let cmds = vec![
|
|
||||||
format!("cd {workdir} && git init"),
|
// Prefer git when available; fall back to archive download when git is missing or fetch fails.
|
||||||
format!(
|
if has_cmd("git").await {
|
||||||
"cd {workdir} && git remote remove origin >/dev/null 2>&1 || true && git remote add origin {repo}"
|
let cmds = vec![
|
||||||
),
|
format!("cd {workdir} && git init"),
|
||||||
format!("cd {workdir} && git fetch --depth=1 origin {sha}"),
|
format!(
|
||||||
format!("cd {workdir} && git checkout -q FETCH_HEAD"),
|
"cd {workdir} && git remote remove origin >/dev/null 2>&1 || true && git remote add origin {repo_eff}"
|
||||||
];
|
),
|
||||||
for c in cmds {
|
format!("cd {workdir} && git fetch --depth=1 origin {sha}"),
|
||||||
let _ = run_shell(&c).await?;
|
format!("cd {workdir} && git checkout -q FETCH_HEAD"),
|
||||||
|
];
|
||||||
|
for c in cmds {
|
||||||
|
match run_shell(&c).await {
|
||||||
|
Ok(_) => {}
|
||||||
|
Err(e) => {
|
||||||
|
// Try archive fallback once on any git failure
|
||||||
|
warn!(error = %e, "git path failed; attempting archive fallback");
|
||||||
|
return fetch_repo_via_archive(&repo_eff, sha, workdir).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
fetch_repo_via_archive(&repo_eff, sha, workdir).await
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_job_script_streamed(
|
async fn run_job_script_streamed(
|
||||||
|
|
@ -74,7 +157,30 @@ async fn run_job_script_streamed(
|
||||||
let script = format!("{}/.solstice/job.sh", workdir);
|
let script = format!("{}/.solstice/job.sh", workdir);
|
||||||
if !fs::try_exists(&script).await.into_diagnostic()? {
|
if !fs::try_exists(&script).await.into_diagnostic()? {
|
||||||
warn!(path = %script, "job script not found");
|
warn!(path = %script, "job script not found");
|
||||||
return Ok(0);
|
if let Some(tx0) = tx.as_ref() {
|
||||||
|
let _ = tx0
|
||||||
|
.send(LogItem {
|
||||||
|
request_id: request_id.to_string(),
|
||||||
|
event: Some(Event::Log(LogChunk {
|
||||||
|
line: format!("[runner] job script not found at {}", script),
|
||||||
|
stderr: true,
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
return Ok(1);
|
||||||
|
}
|
||||||
|
// Emit explicit pre-exec line to aid diagnostics
|
||||||
|
if let Some(tx0) = tx.as_ref() {
|
||||||
|
let _ = tx0
|
||||||
|
.send(LogItem {
|
||||||
|
request_id: request_id.to_string(),
|
||||||
|
event: Some(Event::Log(LogChunk {
|
||||||
|
line: format!("[runner] executing {}", script),
|
||||||
|
stderr: false,
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
let _ = run_shell(&format!("chmod +x {} || true", script)).await?;
|
let _ = run_shell(&format!("chmod +x {} || true", script)).await?;
|
||||||
|
|
||||||
|
|
@ -87,35 +193,73 @@ async fn run_job_script_streamed(
|
||||||
|
|
||||||
if let Some(tx) = tx.clone() {
|
if let Some(tx) = tx.clone() {
|
||||||
if let Some(stdout) = child.stdout.take() {
|
if let Some(stdout) = child.stdout.take() {
|
||||||
let mut lines = BufReader::new(stdout).lines();
|
let mut reader = BufReader::new(stdout);
|
||||||
let tx2 = tx.clone();
|
let tx2 = tx.clone();
|
||||||
let req = request_id.to_string();
|
let req = request_id.to_string();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
while let Ok(Some(line)) = lines.next_line().await {
|
loop {
|
||||||
let _ = tx2
|
let mut buf = Vec::with_capacity(256);
|
||||||
.send(LogItem {
|
match reader.read_until(b'\n', &mut buf).await {
|
||||||
request_id: req.clone(),
|
Ok(0) => break,
|
||||||
event: Some(Event::Log(LogChunk {
|
Ok(_) => {
|
||||||
line,
|
let line = String::from_utf8_lossy(&buf).trim_end_matches(['\n', '\r']).to_string();
|
||||||
stderr: false,
|
let _ = tx2
|
||||||
})),
|
.send(LogItem {
|
||||||
})
|
request_id: req.clone(),
|
||||||
.await;
|
event: Some(Event::Log(LogChunk {
|
||||||
|
line,
|
||||||
|
stderr: false,
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let _ = tx2
|
||||||
|
.send(LogItem {
|
||||||
|
request_id: req.clone(),
|
||||||
|
event: Some(Event::Log(LogChunk {
|
||||||
|
line: format!("[runner] error reading stdout: {e}"),
|
||||||
|
stderr: true,
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if let Some(stderr) = child.stderr.take() {
|
if let Some(stderr) = child.stderr.take() {
|
||||||
let mut lines = BufReader::new(stderr).lines();
|
let mut reader = BufReader::new(stderr);
|
||||||
let tx2 = tx.clone();
|
let tx2 = tx.clone();
|
||||||
let req = request_id.to_string();
|
let req = request_id.to_string();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
while let Ok(Some(line)) = lines.next_line().await {
|
loop {
|
||||||
let _ = tx2
|
let mut buf = Vec::with_capacity(256);
|
||||||
.send(LogItem {
|
match reader.read_until(b'\n', &mut buf).await {
|
||||||
request_id: req.clone(),
|
Ok(0) => break,
|
||||||
event: Some(Event::Log(LogChunk { line, stderr: true })),
|
Ok(_) => {
|
||||||
})
|
let line = String::from_utf8_lossy(&buf).trim_end_matches(['\n', '\r']).to_string();
|
||||||
.await;
|
let _ = tx2
|
||||||
|
.send(LogItem {
|
||||||
|
request_id: req.clone(),
|
||||||
|
event: Some(Event::Log(LogChunk { line, stderr: true })),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let _ = tx2
|
||||||
|
.send(LogItem {
|
||||||
|
request_id: req.clone(),
|
||||||
|
event: Some(Event::Log(LogChunk {
|
||||||
|
line: format!("[runner] error reading stderr: {e}"),
|
||||||
|
stderr: true,
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -151,28 +295,34 @@ async fn main() -> Result<()> {
|
||||||
|
|
||||||
// Setup gRPC streaming if orchestrator address and request id are provided
|
// Setup gRPC streaming if orchestrator address and request id are provided
|
||||||
let orch_addr = std::env::var("SOLSTICE_ORCH_ADDR").ok();
|
let orch_addr = std::env::var("SOLSTICE_ORCH_ADDR").ok();
|
||||||
let request_id = std::env::var("SOLSTICE_REQUEST_ID").ok();
|
let request_id_env = std::env::var("SOLSTICE_REQUEST_ID").ok();
|
||||||
|
// Use provided request id or empty string (server will warn if invalid)
|
||||||
|
let request_id_effective = request_id_env.unwrap_or_default();
|
||||||
let mut tx_opt: Option<mpsc::Sender<LogItem>> = None;
|
let mut tx_opt: Option<mpsc::Sender<LogItem>> = None;
|
||||||
if let (Some(addr), Some(req_id)) = (orch_addr.clone(), request_id.clone()) {
|
let mut stream_handle: Option<tokio::task::JoinHandle<()>> = None;
|
||||||
let (tx, rx) = mpsc::channel::<LogItem>(256);
|
if let Some(addr) = orch_addr.clone() {
|
||||||
|
let (tx, rx) = mpsc::channel::<LogItem>(512);
|
||||||
let stream = ReceiverStream::new(rx);
|
let stream = ReceiverStream::new(rx);
|
||||||
// Spawn client task
|
// Spawn client task and keep a handle so we can await graceful flush on shutdown.
|
||||||
tokio::spawn(async move {
|
let handle = tokio::spawn(async move {
|
||||||
match RunnerClient::connect(format!("http://{addr}")).await {
|
match RunnerClient::connect(format!("http://{addr}")).await {
|
||||||
Ok(mut client) => {
|
Ok(mut client) => {
|
||||||
let _ = client.stream_logs(stream).await; // ignore result
|
if let Err(e) = client.stream_logs(stream).await {
|
||||||
|
warn!(error = %e, "log stream to orchestrator terminated with error");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!(error = %e, "failed to connect to orchestrator gRPC; logs will not be streamed");
|
warn!(error = %e, "failed to connect to orchestrator gRPC; logs will not be streamed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
stream_handle = Some(handle);
|
||||||
tx_opt = Some(tx);
|
tx_opt = Some(tx);
|
||||||
// Send a first line
|
// Send a first line
|
||||||
if let Some(ref tx) = tx_opt {
|
if let Some(ref tx) = tx_opt {
|
||||||
let _ = tx
|
let _ = tx
|
||||||
.send(LogItem {
|
.send(LogItem {
|
||||||
request_id: req_id.clone(),
|
request_id: request_id_effective.clone(),
|
||||||
event: Some(Event::Log(LogChunk {
|
event: Some(Event::Log(LogChunk {
|
||||||
line: format!("runner starting: repo={repo} sha={sha}"),
|
line: format!("runner starting: repo={repo} sha={sha}"),
|
||||||
stderr: false,
|
stderr: false,
|
||||||
|
|
@ -182,19 +332,33 @@ async fn main() -> Result<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure_repo(&repo, &sha, &workdir).await?;
|
let code = match ensure_repo(&repo, &sha, &workdir).await {
|
||||||
let code = run_job_script_streamed(
|
Ok(_) => {
|
||||||
&workdir,
|
// proceed to run job script
|
||||||
tx_opt.clone(),
|
run_job_script_streamed(&workdir, tx_opt.clone(), &request_id_effective).await?
|
||||||
request_id.as_deref().unwrap_or(""),
|
}
|
||||||
)
|
Err(e) => {
|
||||||
.await?;
|
// Report checkout failure to orchestrator and return non-zero
|
||||||
|
if let Some(tx) = tx_opt.clone() {
|
||||||
|
let _ = tx
|
||||||
|
.send(LogItem {
|
||||||
|
request_id: request_id_effective.clone(),
|
||||||
|
event: Some(Event::Log(LogChunk {
|
||||||
|
line: format!("[runner] failed to prepare repo: {e}"),
|
||||||
|
stderr: true,
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
1
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Send JobEnd if streaming enabled
|
// Send JobEnd if streaming enabled
|
||||||
if let (Some(tx), Some(req_id)) = (tx_opt.clone(), request_id.clone()) {
|
if let Some(tx) = tx_opt.clone() {
|
||||||
let _ = tx
|
let _ = tx
|
||||||
.send(LogItem {
|
.send(LogItem {
|
||||||
request_id: req_id.clone(),
|
request_id: request_id_effective.clone(),
|
||||||
event: Some(Event::End(JobEnd {
|
event: Some(Event::End(JobEnd {
|
||||||
exit_code: code,
|
exit_code: code,
|
||||||
success: code == 0,
|
success: code == 0,
|
||||||
|
|
@ -203,8 +367,14 @@ async fn main() -> Result<()> {
|
||||||
})),
|
})),
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
// Give the client task a brief moment to flush
|
// Drop the last sender to close the client stream and allow server to flush
|
||||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
drop(tx);
|
||||||
|
tx_opt = None;
|
||||||
|
// Give the client task a brief moment to flush and then await it
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(150)).await;
|
||||||
|
if let Some(h) = stream_handle {
|
||||||
|
let _ = h.await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if code != 0 {
|
if code != 0 {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue