Close the control loop: versioned bind, event-driven controller, graceful shutdown

- Move WatchEventType and ResourceEvent to reddwarf-core so scheduler
  and runtime can use them without depending on the apiserver crate
- Fix scheduler bind_pod to create versioned commits and publish
  MODIFIED events so the pod controller learns about scheduled pods
- Replace polling loop in pod controller with event bus subscription,
  wire handle_delete for DELETED events, keep reconcile_all for
  startup sync and lag recovery
- Add allocatable/capacity resources (cpu, memory, pods) to node agent
  build_node so the scheduler's resource filter accepts nodes
- Bootstrap "default" namespace on startup to prevent pod creation
  failures in the default namespace
- Replace .abort() shutdown with CancellationToken-based graceful
  shutdown across scheduler, controller, and node agent

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Till Wegmueller 2026-02-08 23:21:53 +01:00
parent 8d9ae6ac62
commit c50ecb2664
No known key found for this signature in database
13 changed files with 452 additions and 138 deletions

5
Cargo.lock generated
View file

@ -1337,6 +1337,7 @@ dependencies = [
"reddwarf-storage", "reddwarf-storage",
"reddwarf-versioning", "reddwarf-versioning",
"tokio", "tokio",
"tokio-util",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
] ]
@ -1399,6 +1400,7 @@ dependencies = [
"thiserror 2.0.18", "thiserror 2.0.18",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tokio-util",
"tracing", "tracing",
"uuid", "uuid",
] ]
@ -1411,11 +1413,13 @@ dependencies = [
"miette", "miette",
"reddwarf-core", "reddwarf-core",
"reddwarf-storage", "reddwarf-storage",
"reddwarf-versioning",
"serde", "serde",
"serde_json", "serde_json",
"tempfile", "tempfile",
"thiserror 2.0.18", "thiserror 2.0.18",
"tokio", "tokio",
"tokio-util",
"tracing", "tracing",
] ]
@ -2051,6 +2055,7 @@ dependencies = [
"bytes", "bytes",
"futures-core", "futures-core",
"futures-sink", "futures-sink",
"futures-util",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",
] ]

View file

@ -43,6 +43,7 @@ anyhow = "1.0"
# Async runtime # Async runtime
tokio = { version = "1.40", features = ["full"] } tokio = { version = "1.40", features = ["full"] }
tokio-stream = { version = "0.1", features = ["sync"] } tokio-stream = { version = "0.1", features = ["sync"] }
tokio-util = { version = "0.7", features = ["rt"] }
futures-util = "0.3" futures-util = "0.3"
async-trait = "0.1" async-trait = "0.1"

View file

@ -1,7 +1,4 @@
use reddwarf_core::{GroupVersionKind, ResourceKey}; pub use reddwarf_core::{ResourceEvent, WatchEventType};
use serde::{Deserialize, Serialize};
use crate::watch::WatchEventType;
/// Configuration for the event bus /// Configuration for the event bus
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -16,74 +13,12 @@ impl Default for EventBusConfig {
} }
} }
/// A resource event emitted by the API server on mutations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResourceEvent {
/// Type of watch event (ADDED, MODIFIED, DELETED)
pub event_type: WatchEventType,
/// GroupVersionKind of the resource
pub gvk: GroupVersionKind,
/// Full resource key (gvk + namespace + name)
pub resource_key: ResourceKey,
/// The serialized resource object
pub object: serde_json::Value,
/// Resource version at the time of the event
pub resource_version: String,
}
impl ResourceEvent {
/// Create an ADDED event
pub fn added(
resource_key: ResourceKey,
object: serde_json::Value,
resource_version: String,
) -> Self {
Self {
event_type: WatchEventType::Added,
gvk: resource_key.gvk.clone(),
resource_key,
object,
resource_version,
}
}
/// Create a MODIFIED event
pub fn modified(
resource_key: ResourceKey,
object: serde_json::Value,
resource_version: String,
) -> Self {
Self {
event_type: WatchEventType::Modified,
gvk: resource_key.gvk.clone(),
resource_key,
object,
resource_version,
}
}
/// Create a DELETED event
pub fn deleted(
resource_key: ResourceKey,
object: serde_json::Value,
resource_version: String,
) -> Self {
Self {
event_type: WatchEventType::Deleted,
gvk: resource_key.gvk.clone(),
resource_key,
object,
resource_version,
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::handlers::common::{create_resource, delete_resource, update_resource}; use crate::handlers::common::{create_resource, delete_resource, update_resource};
use crate::AppState; use crate::AppState;
use reddwarf_core::{GroupVersionKind, Pod, Resource}; use reddwarf_core::{GroupVersionKind, Pod, Resource, ResourceKey, WatchEventType};
use reddwarf_storage::RedbBackend; use reddwarf_storage::RedbBackend;
use reddwarf_versioning::VersionStore; use reddwarf_versioning::VersionStore;
use std::sync::Arc; use std::sync::Arc;

View file

@ -3,21 +3,12 @@ use crate::AppState;
use axum::response::sse::{Event, KeepAlive, Sse}; use axum::response::sse::{Event, KeepAlive, Sse};
use futures_util::StreamExt; use futures_util::StreamExt;
use reddwarf_core::GroupVersionKind; use reddwarf_core::GroupVersionKind;
pub use reddwarf_core::WatchEventType;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::convert::Infallible; use std::convert::Infallible;
use std::sync::Arc; use std::sync::Arc;
use tokio_stream::wrappers::{errors::BroadcastStreamRecvError, BroadcastStream}; use tokio_stream::wrappers::{errors::BroadcastStreamRecvError, BroadcastStream};
/// Watch event type
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum WatchEventType {
Added,
Modified,
Deleted,
Error,
}
/// Watch event /// Watch event
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WatchEvent<T> { pub struct WatchEvent<T> {

View file

@ -0,0 +1,74 @@
use crate::types::{GroupVersionKind, ResourceKey};
use serde::{Deserialize, Serialize};
/// Watch event type
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum WatchEventType {
Added,
Modified,
Deleted,
Error,
}
/// A resource event emitted by the API server on mutations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResourceEvent {
/// Type of watch event (ADDED, MODIFIED, DELETED)
pub event_type: WatchEventType,
/// GroupVersionKind of the resource
pub gvk: GroupVersionKind,
/// Full resource key (gvk + namespace + name)
pub resource_key: ResourceKey,
/// The serialized resource object
pub object: serde_json::Value,
/// Resource version at the time of the event
pub resource_version: String,
}
impl ResourceEvent {
/// Create an ADDED event
pub fn added(
resource_key: ResourceKey,
object: serde_json::Value,
resource_version: String,
) -> Self {
Self {
event_type: WatchEventType::Added,
gvk: resource_key.gvk.clone(),
resource_key,
object,
resource_version,
}
}
/// Create a MODIFIED event
pub fn modified(
resource_key: ResourceKey,
object: serde_json::Value,
resource_version: String,
) -> Self {
Self {
event_type: WatchEventType::Modified,
gvk: resource_key.gvk.clone(),
resource_key,
object,
resource_version,
}
}
/// Create a DELETED event
pub fn deleted(
resource_key: ResourceKey,
object: serde_json::Value,
resource_version: String,
) -> Self {
Self {
event_type: WatchEventType::Deleted,
gvk: resource_key.gvk.clone(),
resource_key,
object,
resource_version,
}
}
}

View file

@ -7,11 +7,13 @@
//! - Serialization helpers //! - Serialization helpers
pub mod error; pub mod error;
pub mod events;
pub mod resources; pub mod resources;
pub mod types; pub mod types;
// Re-export commonly used types // Re-export commonly used types
pub use error::{ReddwarfError, Result}; pub use error::{ReddwarfError, Result};
pub use events::{ResourceEvent, WatchEventType};
pub use resources::{is_valid_name, Resource, ResourceError}; pub use resources::{is_valid_name, Resource, ResourceError};
pub use types::{GroupVersionKind, ResourceKey, ResourceVersion}; pub use types::{GroupVersionKind, ResourceKey, ResourceVersion};

View file

@ -12,6 +12,7 @@ reddwarf-core = { workspace = true }
k8s-openapi = { workspace = true } k8s-openapi = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tokio-stream = { workspace = true } tokio-stream = { workspace = true }
tokio-util = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
miette = { workspace = true } miette = { workspace = true }

View file

@ -3,7 +3,10 @@ use crate::error::{Result, RuntimeError};
use crate::traits::ZoneRuntime; use crate::traits::ZoneRuntime;
use crate::types::*; use crate::types::*;
use k8s_openapi::api::core::v1::{Pod, PodCondition, PodStatus}; use k8s_openapi::api::core::v1::{Pod, PodCondition, PodStatus};
use reddwarf_core::{ResourceEvent, WatchEventType};
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::broadcast;
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
/// Configuration for the pod controller /// Configuration for the pod controller
@ -27,6 +30,7 @@ pub struct PodControllerConfig {
pub struct PodController { pub struct PodController {
runtime: Arc<dyn ZoneRuntime>, runtime: Arc<dyn ZoneRuntime>,
api_client: Arc<ApiClient>, api_client: Arc<ApiClient>,
event_tx: broadcast::Sender<ResourceEvent>,
config: PodControllerConfig, config: PodControllerConfig,
} }
@ -34,33 +38,89 @@ impl PodController {
pub fn new( pub fn new(
runtime: Arc<dyn ZoneRuntime>, runtime: Arc<dyn ZoneRuntime>,
api_client: Arc<ApiClient>, api_client: Arc<ApiClient>,
event_tx: broadcast::Sender<ResourceEvent>,
config: PodControllerConfig, config: PodControllerConfig,
) -> Self { ) -> Self {
Self { Self {
runtime, runtime,
api_client, api_client,
event_tx,
config, config,
} }
} }
/// Run the controller — polls for unscheduled-to-this-node pods in a loop. /// Run the controller — reacts to pod events from the in-process event bus.
/// ///
/// In a real implementation, this would use SSE watch. For now, we receive /// On startup, performs a full reconcile to catch up on any pods that were
/// events via the in-process event bus by subscribing to the broadcast channel. /// scheduled while the controller was down. Then switches to event-driven mode.
/// Since the controller runs in the same process as the API server, we use pub async fn run(&self, token: CancellationToken) -> Result<()> {
/// a simpler polling approach over the HTTP API.
pub async fn run(&self) -> Result<()> {
info!( info!(
"Starting pod controller for node '{}'", "Starting pod controller for node '{}'",
self.config.node_name self.config.node_name
); );
// Poll loop — watches for pods via HTTP list // Initial full sync
if let Err(e) = self.reconcile_all().await {
error!("Initial reconcile failed: {}", e);
}
let mut rx = self.event_tx.subscribe();
loop { loop {
if let Err(e) = self.reconcile_all().await { tokio::select! {
error!("Pod controller reconcile cycle failed: {}", e); _ = token.cancelled() => {
info!("Pod controller shutting down");
return Ok(());
}
result = rx.recv() => {
match result {
Ok(event) => {
if event.gvk.kind != "Pod" {
continue;
}
match event.event_type {
WatchEventType::Added | WatchEventType::Modified => {
match serde_json::from_value::<Pod>(event.object) {
Ok(pod) => {
if let Err(e) = self.reconcile(&pod).await {
let name = pod.metadata.name.as_deref().unwrap_or("<unknown>");
error!("Failed to reconcile pod {}: {}", name, e);
}
}
Err(e) => {
warn!("Failed to parse pod from event: {}", e);
}
}
}
WatchEventType::Deleted => {
match serde_json::from_value::<Pod>(event.object) {
Ok(pod) => {
if let Err(e) = self.handle_delete(&pod).await {
let name = pod.metadata.name.as_deref().unwrap_or("<unknown>");
error!("Failed to handle pod deletion {}: {}", name, e);
}
}
Err(e) => {
warn!("Failed to parse pod from delete event: {}", e);
}
}
}
_ => {}
}
}
Err(broadcast::error::RecvError::Lagged(n)) => {
warn!("Missed {} events, doing full resync", n);
if let Err(e) = self.reconcile_all().await {
error!("Resync after lag failed: {}", e);
}
}
Err(broadcast::error::RecvError::Closed) => {
info!("Event bus closed, stopping pod controller");
return Ok(());
}
}
}
} }
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
} }
} }

View file

@ -1,9 +1,12 @@
use crate::api_client::ApiClient; use crate::api_client::ApiClient;
use crate::error::{Result, RuntimeError}; use crate::error::{Result, RuntimeError};
use k8s_openapi::api::core::v1::{Node, NodeAddress, NodeCondition, NodeStatus}; use k8s_openapi::api::core::v1::{Node, NodeAddress, NodeCondition, NodeStatus};
use k8s_openapi::apimachinery::pkg::api::resource::Quantity;
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta; use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
use std::collections::BTreeMap;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use tokio_util::sync::CancellationToken;
use tracing::{info, warn}; use tracing::{info, warn};
/// Configuration for the node agent /// Configuration for the node agent
@ -65,7 +68,7 @@ impl NodeAgent {
} }
/// Run the heartbeat loop /// Run the heartbeat loop
pub async fn run(&self) -> Result<()> { pub async fn run(&self, token: CancellationToken) -> Result<()> {
// Register first // Register first
self.register().await?; self.register().await?;
@ -75,10 +78,16 @@ impl NodeAgent {
); );
loop { loop {
tokio::time::sleep(self.config.heartbeat_interval).await; tokio::select! {
_ = token.cancelled() => {
if let Err(e) = self.heartbeat().await { info!("Node agent shutting down");
warn!("Heartbeat failed: {} — will retry", e); return Ok(());
}
_ = tokio::time::sleep(self.config.heartbeat_interval) => {
if let Err(e) = self.heartbeat().await {
warn!("Heartbeat failed: {} — will retry", e);
}
}
} }
} }
} }
@ -99,6 +108,22 @@ impl NodeAgent {
fn build_node(&self) -> Node { fn build_node(&self) -> Node {
let hostname = self.config.node_name.clone(); let hostname = self.config.node_name.clone();
let cpu_count = std::thread::available_parallelism()
.map(|n| n.get().to_string())
.unwrap_or_else(|_| "1".to_string());
let allocatable = BTreeMap::from([
("cpu".to_string(), Quantity(cpu_count.clone())),
("memory".to_string(), Quantity("8Gi".to_string())),
("pods".to_string(), Quantity("110".to_string())),
]);
let capacity = BTreeMap::from([
("cpu".to_string(), Quantity(cpu_count)),
("memory".to_string(), Quantity("8Gi".to_string())),
("pods".to_string(), Quantity("110".to_string())),
]);
Node { Node {
metadata: ObjectMeta { metadata: ObjectMeta {
name: Some(self.config.node_name.clone()), name: Some(self.config.node_name.clone()),
@ -132,6 +157,8 @@ impl NodeAgent {
type_: "Hostname".to_string(), type_: "Hostname".to_string(),
address: hostname, address: hostname,
}]), }]),
allocatable: Some(allocatable),
capacity: Some(capacity),
..Default::default() ..Default::default()
}), }),
..Default::default() ..Default::default()
@ -168,4 +195,36 @@ mod tests {
assert_eq!(conditions[0].status, "True"); assert_eq!(conditions[0].status, "True");
assert!(conditions[0].last_heartbeat_time.is_some()); assert!(conditions[0].last_heartbeat_time.is_some());
} }
#[test]
fn test_build_node_has_allocatable_resources() {
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
let config =
NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
let agent = NodeAgent::new(api_client, config);
let node = agent.build_node();
let status = node.status.unwrap();
// Check allocatable
let alloc = status.allocatable.unwrap();
assert!(alloc.contains_key("cpu"));
assert!(alloc.contains_key("memory"));
assert!(alloc.contains_key("pods"));
assert_eq!(alloc["memory"].0, "8Gi");
assert_eq!(alloc["pods"].0, "110");
// CPU should match available parallelism
let expected_cpu = std::thread::available_parallelism()
.map(|n| n.get().to_string())
.unwrap_or_else(|_| "1".to_string());
assert_eq!(alloc["cpu"].0, expected_cpu);
// Check capacity
let cap = status.capacity.unwrap();
assert!(cap.contains_key("cpu"));
assert!(cap.contains_key("memory"));
assert!(cap.contains_key("pods"));
assert_eq!(cap["cpu"].0, expected_cpu);
}
} }

View file

@ -10,8 +10,10 @@ rust-version.workspace = true
[dependencies] [dependencies]
reddwarf-core = { workspace = true } reddwarf-core = { workspace = true }
reddwarf-storage = { workspace = true } reddwarf-storage = { workspace = true }
reddwarf-versioning = { workspace = true }
k8s-openapi = { workspace = true } k8s-openapi = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tokio-util = { workspace = true }
miette = { workspace = true } miette = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }

View file

@ -2,11 +2,14 @@ use crate::filter::{default_filters, FilterPredicate};
use crate::score::{calculate_weighted_score, default_scores, ScoreFunction}; use crate::score::{calculate_weighted_score, default_scores, ScoreFunction};
use crate::types::SchedulingContext; use crate::types::SchedulingContext;
use crate::{Result, SchedulerError}; use crate::{Result, SchedulerError};
use reddwarf_core::{Node, Pod}; use reddwarf_core::{Node, Pod, ResourceEvent};
use reddwarf_storage::{KVStore, KeyEncoder, RedbBackend}; use reddwarf_storage::{KVStore, KeyEncoder, RedbBackend};
use reddwarf_versioning::{Change, CommitBuilder, VersionStore};
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use tokio::sync::broadcast;
use tokio::time::sleep; use tokio::time::sleep;
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
/// Configuration for the scheduler /// Configuration for the scheduler
@ -27,6 +30,8 @@ impl Default for SchedulerConfig {
/// Pod scheduler /// Pod scheduler
pub struct Scheduler { pub struct Scheduler {
storage: Arc<RedbBackend>, storage: Arc<RedbBackend>,
version_store: Arc<VersionStore>,
event_tx: broadcast::Sender<ResourceEvent>,
config: SchedulerConfig, config: SchedulerConfig,
filters: Vec<Box<dyn FilterPredicate>>, filters: Vec<Box<dyn FilterPredicate>>,
scorers: Vec<Box<dyn ScoreFunction>>, scorers: Vec<Box<dyn ScoreFunction>>,
@ -34,9 +39,16 @@ pub struct Scheduler {
impl Scheduler { impl Scheduler {
/// Create a new scheduler /// Create a new scheduler
pub fn new(storage: Arc<RedbBackend>, config: SchedulerConfig) -> Self { pub fn new(
storage: Arc<RedbBackend>,
version_store: Arc<VersionStore>,
event_tx: broadcast::Sender<ResourceEvent>,
config: SchedulerConfig,
) -> Self {
Self { Self {
storage, storage,
version_store,
event_tx,
config, config,
filters: default_filters(), filters: default_filters(),
scorers: default_scores(), scorers: default_scores(),
@ -44,15 +56,21 @@ impl Scheduler {
} }
/// Run the scheduler loop /// Run the scheduler loop
pub async fn run(&self) -> Result<()> { pub async fn run(&self, token: CancellationToken) -> Result<()> {
info!("Starting scheduler"); info!("Starting scheduler");
loop { loop {
if let Err(e) = self.schedule_cycle().await { tokio::select! {
error!("Scheduling cycle failed: {}", e); _ = token.cancelled() => {
info!("Scheduler shutting down");
return Ok(());
}
_ = sleep(self.config.schedule_interval) => {
if let Err(e) = self.schedule_cycle().await {
error!("Scheduling cycle failed: {}", e);
}
}
} }
sleep(self.config.schedule_interval).await;
} }
} }
@ -240,21 +258,39 @@ impl Scheduler {
Ok(best_node) Ok(best_node)
} }
/// Bind a pod to a node (update spec.nodeName) /// Bind a pod to a node (update spec.nodeName) with versioning and event publishing
async fn bind_pod(&self, pod: &mut Pod, node_name: &str) -> Result<()> { async fn bind_pod(&self, pod: &mut Pod, node_name: &str) -> Result<()> {
let pod_name = pod let pod_name = pod
.metadata .metadata
.name .name
.as_ref() .as_ref()
.ok_or_else(|| SchedulerError::internal_error("Pod has no name"))?; .ok_or_else(|| SchedulerError::internal_error("Pod has no name"))?
.clone();
let namespace = pod let namespace = pod
.metadata .metadata
.namespace .namespace
.as_ref() .as_ref()
.ok_or_else(|| SchedulerError::internal_error("Pod has no namespace"))?; .ok_or_else(|| SchedulerError::internal_error("Pod has no namespace"))?
.clone();
info!("Binding pod {} to node {}", pod_name, node_name); info!("Binding pod {} to node {}", pod_name, node_name);
let key = reddwarf_core::ResourceKey::new(
reddwarf_core::GroupVersionKind::from_api_version_kind("v1", "Pod"),
&namespace,
&pod_name,
);
let storage_key = KeyEncoder::encode_resource_key(&key);
// Read the current pod bytes for version diff
let prev_data = self
.storage
.as_ref()
.get(storage_key.as_bytes())?
.ok_or_else(|| {
SchedulerError::internal_error(format!("Pod not found in storage: {}", pod_name))
})?;
// Update pod spec // Update pod spec
if let Some(spec) = &mut pod.spec { if let Some(spec) = &mut pod.spec {
spec.node_name = Some(node_name.to_string()); spec.node_name = Some(node_name.to_string());
@ -262,21 +298,54 @@ impl Scheduler {
return Err(SchedulerError::internal_error("Pod has no spec")); return Err(SchedulerError::internal_error("Pod has no spec"));
} }
// Save updated pod // Serialize new pod
let key = reddwarf_core::ResourceKey::new( let new_data = serde_json::to_vec(&pod).map_err(|e| {
reddwarf_core::GroupVersionKind::from_api_version_kind("v1", "Pod"),
namespace,
pod_name,
);
let storage_key = KeyEncoder::encode_resource_key(&key);
let data = serde_json::to_vec(&pod).map_err(|e| {
SchedulerError::internal_error(format!("Failed to serialize pod: {}", e)) SchedulerError::internal_error(format!("Failed to serialize pod: {}", e))
})?; })?;
self.storage.as_ref().put(storage_key.as_bytes(), &data)?; // Create a versioned commit
let change = Change::update(
storage_key.clone(),
String::from_utf8_lossy(&new_data).to_string(),
String::from_utf8_lossy(&prev_data).to_string(),
);
info!("Successfully bound pod {} to node {}", pod_name, node_name); let commit = self
.version_store
.create_commit(
CommitBuilder::new()
.change(change)
.message(format!("Bind pod {} to node {}", pod_name, node_name)),
)
.map_err(|e| {
SchedulerError::internal_error(format!("Failed to create commit: {}", e))
})?;
// Set resource version to commit ID
pod.metadata.resource_version = Some(commit.id().to_string());
// Re-serialize with updated resource version
let final_data = serde_json::to_vec(&pod).map_err(|e| {
SchedulerError::internal_error(format!("Failed to serialize pod: {}", e))
})?;
// Write to storage
self.storage
.as_ref()
.put(storage_key.as_bytes(), &final_data)?;
info!(
"Successfully bound pod {} to node {} at version {}",
pod_name,
node_name,
commit.id()
);
// Publish MODIFIED event (best-effort)
if let Ok(object) = serde_json::to_value(&*pod) {
let event = ResourceEvent::modified(key, object, commit.id().to_string());
let _ = self.event_tx.send(event);
}
Ok(()) Ok(())
} }
@ -285,10 +354,23 @@ impl Scheduler {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use reddwarf_core::WatchEventType;
use reddwarf_storage::RedbBackend; use reddwarf_storage::RedbBackend;
use reddwarf_versioning::VersionStore;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use tempfile::tempdir; use tempfile::tempdir;
fn create_test_scheduler() -> (Scheduler, broadcast::Receiver<ResourceEvent>) {
let dir = tempdir().unwrap();
let db_path = dir.path().join("test.redb");
let storage = Arc::new(RedbBackend::new(&db_path).unwrap());
let version_store = Arc::new(VersionStore::new(storage.clone()).unwrap());
let (event_tx, event_rx) = broadcast::channel(64);
let scheduler =
Scheduler::new(storage, version_store, event_tx, SchedulerConfig::default());
(scheduler, event_rx)
}
fn create_test_node(name: &str, cpu: &str, memory: &str) -> Node { fn create_test_node(name: &str, cpu: &str, memory: &str) -> Node {
let mut node = Node::default(); let mut node = Node::default();
node.metadata.name = Some(name.to_string()); node.metadata.name = Some(name.to_string());
@ -355,13 +437,25 @@ mod tests {
pod pod
} }
/// Helper: store a pod in storage so bind_pod can read prev version
fn store_pod(scheduler: &Scheduler, pod: &Pod) {
let key = reddwarf_core::ResourceKey::new(
reddwarf_core::GroupVersionKind::from_api_version_kind("v1", "Pod"),
pod.metadata.namespace.as_deref().unwrap(),
pod.metadata.name.as_deref().unwrap(),
);
let storage_key = KeyEncoder::encode_resource_key(&key);
let data = serde_json::to_vec(pod).unwrap();
scheduler
.storage
.as_ref()
.put(storage_key.as_bytes(), &data)
.unwrap();
}
#[tokio::test] #[tokio::test]
async fn test_schedule_pod_success() { async fn test_schedule_pod_success() {
let dir = tempdir().unwrap(); let (scheduler, _rx) = create_test_scheduler();
let db_path = dir.path().join("test.redb");
let storage = Arc::new(RedbBackend::new(&db_path).unwrap());
let scheduler = Scheduler::new(storage.clone(), SchedulerConfig::default());
// Create nodes // Create nodes
let nodes = vec![ let nodes = vec![
@ -369,8 +463,9 @@ mod tests {
create_test_node("node2", "2", "4Gi"), create_test_node("node2", "2", "4Gi"),
]; ];
// Create pod // Create pod and store it so bind_pod can read the previous version
let pod = create_test_pod("test-pod", "default", "1", "1Gi"); let pod = create_test_pod("test-pod", "default", "1", "1Gi");
store_pod(&scheduler, &pod);
// Schedule pod // Schedule pod
let result = scheduler.schedule_pod(pod, &nodes).await; let result = scheduler.schedule_pod(pod, &nodes).await;
@ -382,11 +477,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_schedule_pod_no_suitable_nodes() { async fn test_schedule_pod_no_suitable_nodes() {
let dir = tempdir().unwrap(); let (scheduler, _rx) = create_test_scheduler();
let db_path = dir.path().join("test.redb");
let storage = Arc::new(RedbBackend::new(&db_path).unwrap());
let scheduler = Scheduler::new(storage, SchedulerConfig::default());
// Create node with insufficient resources // Create node with insufficient resources
let nodes = vec![create_test_node("node1", "1", "1Gi")]; let nodes = vec![create_test_node("node1", "1", "1Gi")];
@ -399,4 +490,36 @@ mod tests {
assert!(result.is_err()); assert!(result.is_err());
} }
#[tokio::test]
async fn test_bind_pod_publishes_modified_event() {
let (scheduler, mut rx) = create_test_scheduler();
let mut pod = create_test_pod("event-pod", "default", "1", "1Gi");
store_pod(&scheduler, &pod);
scheduler.bind_pod(&mut pod, "node1").await.unwrap();
let event = rx.try_recv().unwrap();
assert!(matches!(event.event_type, WatchEventType::Modified));
assert_eq!(event.resource_key.name, "event-pod");
assert_eq!(event.gvk.kind, "Pod");
// Verify the event object has the updated node name
let bound_pod: Pod = serde_json::from_value(event.object).unwrap();
assert_eq!(bound_pod.spec.unwrap().node_name, Some("node1".to_string()));
}
#[tokio::test]
async fn test_bind_pod_sets_resource_version() {
let (scheduler, _rx) = create_test_scheduler();
let mut pod = create_test_pod("version-pod", "default", "1", "1Gi");
store_pod(&scheduler, &pod);
scheduler.bind_pod(&mut pod, "node1").await.unwrap();
assert!(pod.metadata.resource_version.is_some());
assert!(!pod.metadata.resource_version.as_ref().unwrap().is_empty());
}
} }

View file

@ -19,6 +19,7 @@ reddwarf-apiserver = { workspace = true }
reddwarf-scheduler = { workspace = true } reddwarf-scheduler = { workspace = true }
reddwarf-runtime = { workspace = true } reddwarf-runtime = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tokio-util = { workspace = true }
clap = { workspace = true } clap = { workspace = true }
miette = { workspace = true } miette = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }

View file

@ -1,5 +1,6 @@
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use reddwarf_apiserver::{ApiServer, AppState, Config as ApiConfig}; use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig};
use reddwarf_core::Namespace;
use reddwarf_runtime::{ use reddwarf_runtime::{
ApiClient, EtherstubConfig, MockRuntime, NetworkMode, NodeAgent, NodeAgentConfig, ApiClient, EtherstubConfig, MockRuntime, NetworkMode, NodeAgent, NodeAgentConfig,
PodController, PodControllerConfig, ZoneBrand, PodController, PodControllerConfig, ZoneBrand,
@ -9,6 +10,7 @@ use reddwarf_scheduler::Scheduler;
use reddwarf_storage::RedbBackend; use reddwarf_storage::RedbBackend;
use reddwarf_versioning::VersionStore; use reddwarf_versioning::VersionStore;
use std::sync::Arc; use std::sync::Arc;
use tokio_util::sync::CancellationToken;
use tracing::{error, info}; use tracing::{error, info};
#[derive(Parser)] #[derive(Parser)]
@ -79,6 +81,8 @@ async fn run_serve(bind: &str, data_dir: &str) -> miette::Result<()> {
let state = create_app_state(data_dir)?; let state = create_app_state(data_dir)?;
bootstrap_default_namespace(&state).await?;
let config = ApiConfig { let config = ApiConfig {
listen_addr: bind listen_addr: bind
.parse() .parse()
@ -106,6 +110,8 @@ async fn run_agent(
let state = create_app_state(data_dir)?; let state = create_app_state(data_dir)?;
bootstrap_default_namespace(&state).await?;
let listen_addr: std::net::SocketAddr = bind let listen_addr: std::net::SocketAddr = bind
.parse() .parse()
.map_err(|e| miette::miette!("Invalid bind address '{}': {}", bind, e))?; .map_err(|e| miette::miette!("Invalid bind address '{}': {}", bind, e))?;
@ -113,12 +119,22 @@ async fn run_agent(
// Determine the API URL for internal components to connect to // Determine the API URL for internal components to connect to
let api_url = format!("http://127.0.0.1:{}", listen_addr.port()); let api_url = format!("http://127.0.0.1:{}", listen_addr.port());
let token = CancellationToken::new();
// 1. Spawn API server // 1. Spawn API server
let api_config = ApiConfig { listen_addr }; let api_config = ApiConfig { listen_addr };
let api_server = ApiServer::new(api_config, state.clone()); let api_server = ApiServer::new(api_config, state.clone());
let api_token = token.clone();
let api_handle = tokio::spawn(async move { let api_handle = tokio::spawn(async move {
if let Err(e) = api_server.run().await { tokio::select! {
error!("API server error: {}", e); result = api_server.run() => {
if let Err(e) = result {
error!("API server error: {}", e);
}
}
_ = api_token.cancelled() => {
info!("API server shutting down");
}
} }
}); });
@ -126,9 +142,15 @@ async fn run_agent(
tokio::time::sleep(std::time::Duration::from_millis(500)).await; tokio::time::sleep(std::time::Duration::from_millis(500)).await;
// 2. Spawn scheduler // 2. Spawn scheduler
let scheduler = Scheduler::new(state.storage.clone(), SchedulerConfig::default()); let scheduler = Scheduler::new(
state.storage.clone(),
state.version_store.clone(),
state.event_tx.clone(),
SchedulerConfig::default(),
);
let scheduler_token = token.clone();
let scheduler_handle = tokio::spawn(async move { let scheduler_handle = tokio::spawn(async move {
if let Err(e) = scheduler.run().await { if let Err(e) = scheduler.run(scheduler_token).await {
error!("Scheduler error: {}", e); error!("Scheduler error: {}", e);
} }
}); });
@ -152,9 +174,15 @@ async fn run_agent(
}), }),
}; };
let controller = PodController::new(runtime, api_client.clone(), controller_config); let controller = PodController::new(
runtime,
api_client.clone(),
state.event_tx.clone(),
controller_config,
);
let controller_token = token.clone();
let controller_handle = tokio::spawn(async move { let controller_handle = tokio::spawn(async move {
if let Err(e) = controller.run().await { if let Err(e) = controller.run(controller_token).await {
error!("Pod controller error: {}", e); error!("Pod controller error: {}", e);
} }
}); });
@ -162,8 +190,9 @@ async fn run_agent(
// 5. Spawn node agent // 5. Spawn node agent
let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url); let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
let node_agent = NodeAgent::new(api_client, node_agent_config); let node_agent = NodeAgent::new(api_client, node_agent_config);
let agent_token = token.clone();
let node_agent_handle = tokio::spawn(async move { let node_agent_handle = tokio::spawn(async move {
if let Err(e) = node_agent.run().await { if let Err(e) = node_agent.run(agent_token).await {
error!("Node agent error: {}", e); error!("Node agent error: {}", e);
} }
}); });
@ -178,14 +207,45 @@ async fn run_agent(
.await .await
.map_err(|e| miette::miette!("Failed to listen for ctrl-c: {}", e))?; .map_err(|e| miette::miette!("Failed to listen for ctrl-c: {}", e))?;
info!("Shutting down..."); info!("Shutting down gracefully...");
token.cancel();
// Abort all tasks // Wait for all tasks to finish with a timeout
api_handle.abort(); let shutdown_timeout = std::time::Duration::from_secs(5);
scheduler_handle.abort(); let _ = tokio::time::timeout(shutdown_timeout, async {
controller_handle.abort(); let _ = tokio::join!(
node_agent_handle.abort(); api_handle,
scheduler_handle,
controller_handle,
node_agent_handle,
);
})
.await;
info!("Shutdown complete");
Ok(())
}
/// Bootstrap the "default" namespace if it doesn't already exist
async fn bootstrap_default_namespace(state: &AppState) -> miette::Result<()> {
use reddwarf_apiserver::handlers::common::create_resource;
let mut ns = Namespace::default();
ns.metadata.name = Some("default".to_string());
match create_resource(state, ns).await {
Ok(_) => info!("Created default namespace"),
Err(ApiError::AlreadyExists(_)) => {
// Already exists — fine
}
Err(e) => {
return Err(miette::miette!(
"Failed to bootstrap default namespace: {:?}",
e
))
}
}
Ok(()) Ok(())
} }