Add log delivery and step state reporting to Forgejo runner

Fetches logs from logs-service per category, uploads them to Forgejo
via UpdateLog, and reports per-step StepState entries so the Forgejo
UI shows individual step results and log output.
This commit is contained in:
Till Wegmueller 2026-04-06 23:59:26 +02:00
parent 5dfd9c367b
commit d8ef6ef236
6 changed files with 223 additions and 25 deletions

View file

@ -151,7 +151,11 @@ async fn main() -> Result<()> {
.await?; .await?;
// Build shared state // Build shared state
let state = Arc::new(RunnerState::new(identity, opts.max_concurrency)); let state = Arc::new(RunnerState::new(
identity,
opts.max_concurrency,
opts.logs_base_url,
));
// Translation context // Translation context
let translate_ctx = Arc::new(TranslateCtx { let translate_ctx = Arc::new(TranslateCtx {

View file

@ -85,7 +85,7 @@ pub async fn run(
match translate_task(&task, &translate_ctx).await { match translate_task(&task, &translate_ctx).await {
Ok(TranslateResult::Jobs(jobs)) => { Ok(TranslateResult::Jobs(jobs)) => {
let mut published_any = false; let mut published_any = false;
for jr in &jobs { for (jr, steps) in &jobs {
state.in_flight.insert( state.in_flight.insert(
jr.request_id, jr.request_id,
TaskMeta { TaskMeta {
@ -93,6 +93,7 @@ pub async fn run(
repo_url: jr.repo_url.clone(), repo_url: jr.repo_url.clone(),
commit_sha: jr.commit_sha.clone(), commit_sha: jr.commit_sha.clone(),
started_at: Instant::now(), started_at: Instant::now(),
steps: steps.clone(),
}, },
); );
@ -189,7 +190,9 @@ async fn report_running(client: &ConnectClient, state: &RunnerState, task_id: i6
}), }),
outputs: Default::default(), outputs: Default::default(),
}; };
client.update_task(&req, &state.identity.uuid, &state.identity.token).await?; client
.update_task(&req, &state.identity.uuid, &state.identity.token)
.await?;
Ok(()) Ok(())
} }
@ -213,7 +216,9 @@ async fn report_failure(
}), }),
outputs: Default::default(), outputs: Default::default(),
}; };
client.update_task(&req, &state.identity.uuid, &state.identity.token).await?; client
.update_task(&req, &state.identity.uuid, &state.identity.token)
.await?;
// Also send the error message as a log line // Also send the error message as a log line
let log_req = crate::proto::runner::v1::UpdateLogRequest { let log_req = crate::proto::runner::v1::UpdateLogRequest {
@ -228,7 +233,9 @@ async fn report_failure(
}], }],
no_more: true, no_more: true,
}; };
client.update_log(&log_req, &state.identity.uuid, &state.identity.token).await?; client
.update_log(&log_req, &state.identity.uuid, &state.identity.token)
.await?;
Ok(()) Ok(())
} }

View file

@ -3,10 +3,12 @@ use std::sync::Arc;
use futures_util::StreamExt; use futures_util::StreamExt;
use miette::{IntoDiagnostic, Result}; use miette::{IntoDiagnostic, Result};
use tokio::sync::watch; use tokio::sync::watch;
use tracing::{info, warn}; use tracing::{debug, error, info, warn};
use crate::connect::ConnectClient; use crate::connect::ConnectClient;
use crate::proto::runner::v1::{self, TaskState, UpdateTaskRequest}; use crate::proto::runner::v1::{
self, LogRow, StepState, TaskState, UpdateLogRequest, UpdateTaskRequest,
};
use crate::state::RunnerState; use crate::state::RunnerState;
/// Consume JobResults from RabbitMQ and report them back to Forgejo. /// Consume JobResults from RabbitMQ and report them back to Forgejo.
@ -165,24 +167,174 @@ async fn report_to_forgejo(
v1::Result::Failure v1::Result::Failure
}; };
// --- Fetch and upload logs ---
let mut log_index: i64 = 0;
let mut step_states: Vec<StepState> = Vec::new();
// Fetch all logs from logs-service and send to Forgejo
if let Some(logs_base) = &state.logs_base_url {
// First get the log categories
let categories_url = format!(
"{}/jobs/{}/logs",
logs_base.trim_end_matches('/'),
jobres.request_id
);
let http = reqwest::Client::new();
match http.get(&categories_url).send().await {
Ok(resp) if resp.status().is_success() => {
#[derive(serde::Deserialize)]
struct LogCategory {
category: String,
#[allow(dead_code)]
count: i64,
has_errors: bool,
}
if let Ok(categories) = resp.json::<Vec<LogCategory>>().await {
for (step_idx, cat) in categories.iter().enumerate() {
let step_log_start = log_index;
// Fetch log lines for this category
let log_url = format!(
"{}/jobs/{}/logs/{}",
logs_base.trim_end_matches('/'),
jobres.request_id,
cat.category
);
match http.get(&log_url).send().await {
Ok(resp) if resp.status().is_success() => {
if let Ok(text) = resp.text().await {
let lines: Vec<&str> = text.lines().collect();
let line_count = lines.len() as i64;
if !lines.is_empty() {
// Build LogRow entries
let rows: Vec<LogRow> = lines
.iter()
.map(|line| LogRow {
time: Some(now.clone()),
content: line.to_string(),
})
.collect();
// Send log chunk to Forgejo
let log_req = UpdateLogRequest {
task_id: task_meta.forgejo_task_id,
index: log_index,
rows,
no_more: false,
};
match client
.update_log(
&log_req,
&state.identity.uuid,
&state.identity.token,
)
.await
{
Ok(resp) => {
debug!(
task_id = task_meta.forgejo_task_id,
category = %cat.category,
lines = line_count,
ack_index = resp.ack_index,
"uploaded logs"
);
}
Err(e) => {
warn!(
error = %e,
category = %cat.category,
"failed to upload logs"
);
}
}
log_index += line_count;
}
// Build step state for this category
let step_result = if cat.has_errors {
v1::Result::Failure
} else {
v1::Result::Success
};
step_states.push(StepState {
id: step_idx as i64,
result: step_result as i32,
started_at: Some(now.clone()),
stopped_at: Some(now.clone()),
log_index: step_log_start,
log_length: log_index - step_log_start,
});
}
}
Ok(resp) => {
debug!(
status = %resp.status(),
category = %cat.category,
"failed to fetch log category"
);
}
Err(e) => {
warn!(error = %e, category = %cat.category, "failed to fetch logs");
}
}
}
}
}
Ok(resp) => {
debug!(
status = %resp.status(),
"failed to fetch log categories"
);
}
Err(e) => {
warn!(error = %e, "failed to connect to logs-service");
}
}
}
// Send final "no more logs" marker
let final_log = UpdateLogRequest {
task_id: task_meta.forgejo_task_id,
index: log_index,
rows: vec![],
no_more: true,
};
if let Err(e) = client
.update_log(&final_log, &state.identity.uuid, &state.identity.token)
.await
{
warn!(error = %e, "failed to send final log marker");
}
// --- Report task completion with step states ---
let req = UpdateTaskRequest { let req = UpdateTaskRequest {
state: Some(TaskState { state: Some(TaskState {
id: task_meta.forgejo_task_id, id: task_meta.forgejo_task_id,
result: result as i32, result: result as i32,
started_at: None, // already reported when task started started_at: None, // already reported when task started
stopped_at: Some(now), stopped_at: Some(now),
steps: vec![], steps: step_states,
}), }),
outputs: Default::default(), outputs: Default::default(),
}; };
client.update_task(&req, &state.identity.uuid, &state.identity.token).await?; client
.update_task(&req, &state.identity.uuid, &state.identity.token)
.await?;
info!( info!(
request_id = %jobres.request_id, request_id = %jobres.request_id,
task_id = task_meta.forgejo_task_id, task_id = task_meta.forgejo_task_id,
success = jobres.success, success = jobres.success,
exit_code = jobres.exit_code, exit_code = jobres.exit_code,
log_lines = log_index,
"reported result to Forgejo" "reported result to Forgejo"
); );

View file

@ -15,14 +15,24 @@ pub struct RunnerIdentity {
pub registered_at: String, pub registered_at: String,
} }
/// Info about a workflow step, used for reporting step states to Forgejo.
#[derive(Debug, Clone)]
pub struct StepInfo {
/// Step name as defined in the KDL workflow (e.g., "System info").
pub name: String,
/// Log category in the logs-service (e.g., "step:system-info").
pub log_category: String,
}
/// Metadata for a Forgejo task that is currently in-flight within Solstice. /// Metadata for a Forgejo task that is currently in-flight within Solstice.
#[derive(Debug)] #[derive(Debug)]
pub struct TaskMeta { pub struct TaskMeta {
pub forgejo_task_id: i64, pub forgejo_task_id: i64,
pub repo_url: String, pub repo_url: String,
pub commit_sha: String, pub commit_sha: String,
#[allow(dead_code)]
pub started_at: Instant, pub started_at: Instant,
/// Known workflow steps (populated during translation for log/step reporting).
pub steps: Vec<StepInfo>,
} }
/// Shared state accessible by the poller and reporter tasks. /// Shared state accessible by the poller and reporter tasks.
@ -32,14 +42,21 @@ pub struct RunnerState {
pub in_flight: DashMap<Uuid, TaskMeta>, pub in_flight: DashMap<Uuid, TaskMeta>,
/// Controls how many tasks can be in-flight simultaneously. /// Controls how many tasks can be in-flight simultaneously.
pub semaphore: Arc<Semaphore>, pub semaphore: Arc<Semaphore>,
/// Logs service base URL for fetching job logs.
pub logs_base_url: Option<String>,
} }
impl RunnerState { impl RunnerState {
pub fn new(identity: RunnerIdentity, max_concurrency: usize) -> Self { pub fn new(
identity: RunnerIdentity,
max_concurrency: usize,
logs_base_url: Option<String>,
) -> Self {
Self { Self {
identity, identity,
in_flight: DashMap::new(), in_flight: DashMap::new(),
semaphore: Arc::new(Semaphore::new(max_concurrency)), semaphore: Arc::new(Semaphore::new(max_concurrency)),
logs_base_url,
} }
} }
} }

View file

@ -3,11 +3,12 @@ use tracing::{debug, info, warn};
use uuid::Uuid; use uuid::Uuid;
use crate::proto::runner::v1::Task; use crate::proto::runner::v1::Task;
use crate::state::StepInfo;
/// The result of translating a Forgejo task. /// The result of translating a Forgejo task.
pub enum TranslateResult { pub enum TranslateResult {
/// Successfully translated into one or more JobRequests. /// Successfully translated into one or more JobRequests, with step info for reporting.
Jobs(Vec<common::JobRequest>), Jobs(Vec<(common::JobRequest, Vec<StepInfo>)>),
/// The workflow is not supported — return this message to Forgejo as a failure. /// The workflow is not supported — return this message to Forgejo as a failure.
Unsupported(String), Unsupported(String),
} }
@ -141,7 +142,7 @@ async fn try_kdl_workflow(
repo_url: &str, repo_url: &str,
sha: &str, sha: &str,
group_id: Uuid, group_id: Uuid,
) -> Result<Option<Vec<common::JobRequest>>> { ) -> Result<Option<Vec<(common::JobRequest, Vec<StepInfo>)>>> {
let base = match ctx.forgejo_base.as_deref() { let base = match ctx.forgejo_base.as_deref() {
Some(b) => b, Some(b) => b,
None => return Ok(None), None => return Ok(None),
@ -206,7 +207,7 @@ fn parse_kdl_jobs(
repo: &str, repo: &str,
sha: &str, sha: &str,
group_id: Uuid, group_id: Uuid,
) -> Vec<common::JobRequest> { ) -> Vec<(common::JobRequest, Vec<StepInfo>)> {
let mut out = Vec::new(); let mut out = Vec::new();
let mut lines = kdl.lines().peekable(); let mut lines = kdl.lines().peekable();
@ -216,6 +217,7 @@ fn parse_kdl_jobs(
let id = capture_attr(l, "id"); let id = capture_attr(l, "id");
let mut runs_on = capture_attr(l, "runs_on"); let mut runs_on = capture_attr(l, "runs_on");
let mut script: Option<String> = None; let mut script: Option<String> = None;
let mut steps = Vec::new();
let mut depth = if l.ends_with('{') { 1i32 } else { 0 }; let mut depth = if l.ends_with('{') { 1i32 } else { 0 };
while let Some(ln) = lines.peek().copied() { while let Some(ln) = lines.peek().copied() {
@ -238,6 +240,15 @@ fn parse_kdl_jobs(
script = Some(p); script = Some(p);
} }
} }
if t.starts_with("step ") {
if let Some(name) = capture_attr(t, "name") {
let slug = name.to_lowercase().replace(' ', "-");
steps.push(StepInfo {
name,
log_category: format!("step:{}", slug),
});
}
}
if t.contains("runs_on=") && runs_on.is_none() { if t.contains("runs_on=") && runs_on.is_none() {
runs_on = capture_attr(t, "runs_on"); runs_on = capture_attr(t, "runs_on");
} }
@ -254,7 +265,7 @@ fn parse_kdl_jobs(
jr.workflow_job_id = Some(id_val); jr.workflow_job_id = Some(id_val);
jr.runs_on = runs_on; jr.runs_on = runs_on;
jr.script_path = script; jr.script_path = script;
out.push(jr); out.push((jr, steps));
} }
} }
} }
@ -266,7 +277,7 @@ fn parse_kdl_jobs(
jr.repo_owner = Some(owner.to_string()); jr.repo_owner = Some(owner.to_string());
jr.repo_name = Some(repo.to_string()); jr.repo_name = Some(repo.to_string());
jr.workflow_path = Some(".solstice/workflow.kdl".to_string()); jr.workflow_path = Some(".solstice/workflow.kdl".to_string());
out.push(jr); out.push((jr, vec![]));
} }
out out
@ -296,7 +307,7 @@ fn try_actions_yaml(
repo: &str, repo: &str,
sha: &str, sha: &str,
group_id: Uuid, group_id: Uuid,
) -> Result<Option<Vec<common::JobRequest>>> { ) -> Result<Option<Vec<(common::JobRequest, Vec<StepInfo>)>>> {
let yaml_str = std::str::from_utf8(payload_bytes).into_diagnostic()?; let yaml_str = std::str::from_utf8(payload_bytes).into_diagnostic()?;
let doc: serde_yaml::Value = serde_yaml::from_str(yaml_str).into_diagnostic()?; let doc: serde_yaml::Value = serde_yaml::from_str(yaml_str).into_diagnostic()?;
@ -387,10 +398,13 @@ fn try_actions_yaml(
return Ok(None); return Ok(None);
} }
// For tier 2, we just create the JobRequests — the script content will need // For tier 2, we don't have per-step log categories (orchestrator runs as one block)
// to be handled by the orchestrator. For now, embed a hint in the script_path. Ok(Some(
// TODO: Consider passing the script content via a sidecar mechanism. results
Ok(Some(results.into_iter().map(|(jr, _)| jr).collect())) .into_iter()
.map(|(jr, _script)| (jr, vec![]))
.collect(),
))
} }
fn has_unsupported_features(job_map: &serde_yaml::Mapping) -> bool { fn has_unsupported_features(job_map: &serde_yaml::Mapping) -> bool {

View file

@ -363,17 +363,21 @@ services:
AMQP_RESULTS_QUEUE: solstice.runner-results.v1 AMQP_RESULTS_QUEUE: solstice.runner-results.v1
# Forgejo runner configuration # Forgejo runner configuration
FORGEJO_URL: ${FORGEJO_URL} FORGEJO_URL: ${FORGEJO_URL}
FORGEJO_BASE_URL: ${FORGEJO_BASE_URL} # API base for fetching workflow files from repos (self-hosted Forgejo)
FORGEJO_TOKEN: ${FORGEJO_TOKEN} FORGEJO_BASE_URL: ${FORGEJO_URL}/api/v1
FORGEJO_TOKEN: ${RUNNER_FORGEJO_TOKEN}
RUNNER_REGISTRATION_TOKEN: ${RUNNER_REGISTRATION_TOKEN} RUNNER_REGISTRATION_TOKEN: ${RUNNER_REGISTRATION_TOKEN}
RUNNER_NAME: solstice-runner-${ENV} RUNNER_NAME: solstice-runner-${ENV}
RUNNER_LABELS: ${RUNNER_LABELS:-self-hosted} RUNNER_LABELS: ${RUNNER_LABELS:-self-hosted}
RUNNER_STATE_PATH: /data/runner-state.json RUNNER_STATE_PATH: /data/runner-state.json
MAX_CONCURRENCY: ${RUNNER_MAX_CONCURRENCY:-4} MAX_CONCURRENCY: ${RUNNER_MAX_CONCURRENCY:-4}
LOGS_BASE_URL: https://logs.${ENV}.${DOMAIN} # Internal URL for fetching logs (same Docker network)
LOGS_BASE_URL: http://solstice-logs-service:8082
depends_on: depends_on:
rabbitmq: rabbitmq:
condition: service_healthy condition: service_healthy
logs-service:
condition: service_started
volumes: volumes:
- runner-state:/data:Z - runner-state:/data:Z
networks: networks: