Add dynamic node resource detection with configurable system reservations

Replace hardcoded memory (8Gi) and pod limits (110) in the node agent with actual system detection via the sys-info crate. CPU and memory are detected once at NodeAgent construction and reused on every heartbeat. Capacity reports raw hardware values while allocatable subtracts configurable reservations (--system-reserved-cpu, --system-reserved-memory, --max-pods), giving the scheduler accurate data for filtering and scoring. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 13:20:40 +00:00 · 2026-02-14 21:17:43 +01:00 · 2026-02-14 21:17:43 +01:00 · d3eb0b2511
commit d3eb0b2511
parent 58171c7555
8 changed files with 423 additions and 25 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1543,6 +1543,7 @@ dependencies = [
 "reqwest",
 "serde",
 "serde_json",
+ "sys-info",
 "tempfile",
 "thiserror 2.0.18",
 "tokio",
@ -2020,6 +2021,16 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "sys-info"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
+dependencies = [
+ "cc",
+ "libc",
+]
+
 [[package]]
 name = "system-configuration"
 version = "0.6.1"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -80,6 +80,9 @@ rustls = "0.23"
 rustls-pemfile = "2.0"
 axum-server = { version = "0.7", features = ["tls-rustls"] }

+# System info
+sys-info = "0.9"
+
 # Testing
 tempfile = "3.0"

--- a/crates/reddwarf-runtime/Cargo.toml
+++ b/crates/reddwarf-runtime/Cargo.toml
@ -24,6 +24,7 @@ async-trait = { workspace = true }
 reqwest = { workspace = true }
 chrono = { workspace = true }
 futures-util = { workspace = true }
+sys-info = { workspace = true }

 [dev-dependencies]
 tempfile = { workspace = true }
--- a/crates/reddwarf-runtime/src/error.rs
+++ b/crates/reddwarf-runtime/src/error.rs
@ -144,6 +144,17 @@ pub enum RuntimeError {
        message: String,
    },

+    /// Resource detection failed
+    #[error("Failed to detect system resources: {message}")]
+    #[diagnostic(
+        code(reddwarf::runtime::resource_detection_failed),
+        help("System resource detection is non-fatal. The node agent will fall back to default values (CPU from available_parallelism, 8Gi memory, 110 pods). To investigate, check that /proc/meminfo is readable (Linux) or that the system supports sysconf (illumos/macOS)")
+    )]
+    ResourceDetectionFailed {
+        #[allow(unused)]
+        message: String,
+    },
+
    /// Internal error
    #[error("Internal runtime error: {message}")]
    #[diagnostic(
@ -224,6 +235,12 @@ impl RuntimeError {
        }
    }

+    pub fn resource_detection_failed(message: impl Into<String>) -> Self {
+        Self::ResourceDetectionFailed {
+            message: message.into(),
+        }
+    }
+
    pub fn internal_error(message: impl Into<String>) -> Self {
        Self::InternalError {
            message: message.into(),
--- a/crates/reddwarf-runtime/src/lib.rs
+++ b/crates/reddwarf-runtime/src/lib.rs
@ -13,6 +13,7 @@ pub mod network;
 pub mod node_agent;
 pub mod node_health;
 pub mod storage;
+pub mod sysinfo;
 pub mod traits;
 pub mod types;
 pub mod zone;
--- a/crates/reddwarf-runtime/src/node_agent.rs
+++ b/crates/reddwarf-runtime/src/node_agent.rs
@ -1,5 +1,8 @@
 use crate::api_client::ApiClient;
 use crate::error::{Result, RuntimeError};
+use crate::sysinfo::{
+    compute_node_resources, format_memory_quantity, NodeResources, ResourceReservation,
+};
 use k8s_openapi::api::core::v1::{Node, NodeAddress, NodeCondition, NodeStatus};
 use k8s_openapi::apimachinery::pkg::api::resource::Quantity;
 use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
@ -18,6 +21,12 @@ pub struct NodeAgentConfig {
    pub api_url: String,
    /// Interval between heartbeats
    pub heartbeat_interval: Duration,
+    /// CPU to reserve for system daemons, in millicores (default: 100 = 100m)
+    pub system_reserved_cpu_millicores: i64,
+    /// Memory to reserve for system daemons, in bytes (default: 256Mi)
+    pub system_reserved_memory_bytes: i64,
+    /// Maximum number of pods this node will accept (default: 110)
+    pub max_pods: u32,
 }

 impl NodeAgentConfig {
@ -26,6 +35,9 @@ impl NodeAgentConfig {
            node_name,
            api_url,
            heartbeat_interval: Duration::from_secs(10),
+            system_reserved_cpu_millicores: 100,
+            system_reserved_memory_bytes: 256 * 1024 * 1024,
+            max_pods: 110,
        }
    }
 }
@ -34,11 +46,56 @@ impl NodeAgentConfig {
 pub struct NodeAgent {
    api_client: Arc<ApiClient>,
    config: NodeAgentConfig,
+    /// Detected system resources (None if detection failed at startup).
+    detected: Option<NodeResources>,
 }

 impl NodeAgent {
    pub fn new(api_client: Arc<ApiClient>, config: NodeAgentConfig) -> Self {
-        Self { api_client, config }
+        let reservation = ResourceReservation {
+            cpu_millicores: config.system_reserved_cpu_millicores,
+            memory_bytes: config.system_reserved_memory_bytes,
+        };
+
+        let detected = match compute_node_resources(&reservation, config.max_pods) {
+            Ok(nr) => {
+                info!(
+                    cpu_count = nr.capacity.cpu_count,
+                    total_memory = %format_memory_quantity(nr.capacity.total_memory_bytes),
+                    allocatable_cpu_m = nr.allocatable_cpu_millicores,
+                    allocatable_memory = %format_memory_quantity(nr.allocatable_memory_bytes),
+                    max_pods = nr.max_pods,
+                    "Detected system resources"
+                );
+                Some(nr)
+            }
+            Err(e) => {
+                warn!(
+                    error = %e,
+                    "Failed to detect system resources, falling back to defaults"
+                );
+                None
+            }
+        };
+
+        Self {
+            api_client,
+            config,
+            detected,
+        }
+    }
+
+    #[cfg(test)]
+    fn new_with_detected(
+        api_client: Arc<ApiClient>,
+        config: NodeAgentConfig,
+        detected: Option<NodeResources>,
+    ) -> Self {
+        Self {
+            api_client,
+            config,
+            detected,
+        }
    }

    /// Register this host as a Node resource
@ -108,22 +165,48 @@ impl NodeAgent {
    fn build_node(&self) -> Node {
        let hostname = self.config.node_name.clone();

+        let (capacity, allocatable) = if let Some(ref nr) = self.detected {
+            let cap_cpu = nr.capacity.cpu_count.to_string();
+            let cap_mem = format_memory_quantity(nr.capacity.total_memory_bytes);
+            let pods = nr.max_pods.to_string();
+
+            let alloc_cpu = format!("{}m", nr.allocatable_cpu_millicores);
+            let alloc_mem = format_memory_quantity(nr.allocatable_memory_bytes);
+
+            let capacity = BTreeMap::from([
+                ("cpu".to_string(), Quantity(cap_cpu)),
+                ("memory".to_string(), Quantity(cap_mem)),
+                ("pods".to_string(), Quantity(pods.clone())),
+            ]);
+
+            let allocatable = BTreeMap::from([
+                ("cpu".to_string(), Quantity(alloc_cpu)),
+                ("memory".to_string(), Quantity(alloc_mem)),
+                ("pods".to_string(), Quantity(pods)),
+            ]);
+
+            (capacity, allocatable)
+        } else {
+            // Fallback: use available_parallelism for CPU, hardcoded memory
            let cpu_count = std::thread::available_parallelism()
                .map(|n| n.get().to_string())
                .unwrap_or_else(|_| "1".to_string());

-        let allocatable = BTreeMap::from([
+            let capacity = BTreeMap::from([
                ("cpu".to_string(), Quantity(cpu_count.clone())),
                ("memory".to_string(), Quantity("8Gi".to_string())),
                ("pods".to_string(), Quantity("110".to_string())),
            ]);

-        let capacity = BTreeMap::from([
+            let allocatable = BTreeMap::from([
                ("cpu".to_string(), Quantity(cpu_count)),
                ("memory".to_string(), Quantity("8Gi".to_string())),
                ("pods".to_string(), Quantity("110".to_string())),
            ]);

+            (capacity, allocatable)
+        };
+
        Node {
            metadata: ObjectMeta {
                name: Some(self.config.node_name.clone()),
@ -169,6 +252,7 @@ impl NodeAgent {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::sysinfo::detect_system_resources;

    #[test]
    fn test_node_agent_config_defaults() {
@ -176,6 +260,9 @@ mod tests {
            NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
        assert_eq!(config.node_name, "test-node");
        assert_eq!(config.heartbeat_interval, Duration::from_secs(10));
+        assert_eq!(config.system_reserved_cpu_millicores, 100);
+        assert_eq!(config.system_reserved_memory_bytes, 256 * 1024 * 1024);
+        assert_eq!(config.max_pods, 110);
    }

    #[test]
@ -206,25 +293,90 @@ mod tests {
        let node = agent.build_node();
        let status = node.status.unwrap();

-        // Check allocatable
+        // Check allocatable has all keys
        let alloc = status.allocatable.unwrap();
        assert!(alloc.contains_key("cpu"));
        assert!(alloc.contains_key("memory"));
        assert!(alloc.contains_key("pods"));
-        assert_eq!(alloc["memory"].0, "8Gi");
        assert_eq!(alloc["pods"].0, "110");

-        // CPU should match available parallelism
+        // Memory should come from detected system (not hardcoded 8Gi)
+        let sys = detect_system_resources().expect("detection works in test");
+        let expected_cap_mem = format_memory_quantity(sys.total_memory_bytes);
+        let cap = status.capacity.unwrap();
+        assert_eq!(cap["memory"].0, expected_cap_mem);
+
+        // Capacity CPU should match detected cpu_count
+        assert_eq!(cap["cpu"].0, sys.cpu_count.to_string());
+    }
+
+    #[test]
+    fn test_build_node_allocatable_less_than_capacity() {
+        let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
+        let config =
+            NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
+        let agent = NodeAgent::new(api_client, config);
+
+        // Agent should have detected resources (we're on a real host)
+        assert!(agent.detected.is_some(), "detection should succeed in tests");
+
+        let node = agent.build_node();
+        let status = node.status.unwrap();
+        let cap = status.capacity.unwrap();
+        let alloc = status.allocatable.unwrap();
+
+        // Allocatable CPU (millicores) should be less than capacity CPU (whole cores)
+        let cap_cpu_m = reddwarf_core::resources::ResourceQuantities::parse_cpu(&cap["cpu"].0)
+            .expect("valid cpu");
+        let alloc_cpu_m =
+            reddwarf_core::resources::ResourceQuantities::parse_cpu(&alloc["cpu"].0)
+                .expect("valid cpu");
+        assert!(
+            alloc_cpu_m < cap_cpu_m,
+            "allocatable CPU {}m should be less than capacity {}m",
+            alloc_cpu_m,
+            cap_cpu_m,
+        );
+
+        // Allocatable memory should be less than capacity memory
+        let cap_mem =
+            reddwarf_core::resources::ResourceQuantities::parse_memory(&cap["memory"].0)
+                .expect("valid mem");
+        let alloc_mem =
+            reddwarf_core::resources::ResourceQuantities::parse_memory(&alloc["memory"].0)
+                .expect("valid mem");
+        assert!(
+            alloc_mem < cap_mem,
+            "allocatable memory {} should be less than capacity {}",
+            alloc_mem,
+            cap_mem,
+        );
+    }
+
+    #[test]
+    fn test_build_node_fallback_on_detection_failure() {
+        let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
+        let config =
+            NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
+        // Simulate detection failure
+        let agent = NodeAgent::new_with_detected(api_client, config, None);
+
+        let node = agent.build_node();
+        let status = node.status.unwrap();
+        let alloc = status.allocatable.unwrap();
+        let cap = status.capacity.unwrap();
+
+        // Should fall back to hardcoded defaults
+        assert_eq!(alloc["memory"].0, "8Gi");
+        assert_eq!(alloc["pods"].0, "110");
+        assert_eq!(cap["memory"].0, "8Gi");
+        assert_eq!(cap["pods"].0, "110");
+
+        // CPU falls back to available_parallelism
        let expected_cpu = std::thread::available_parallelism()
            .map(|n| n.get().to_string())
            .unwrap_or_else(|_| "1".to_string());
        assert_eq!(alloc["cpu"].0, expected_cpu);
-
-        // Check capacity
-        let cap = status.capacity.unwrap();
-        assert!(cap.contains_key("cpu"));
-        assert!(cap.contains_key("memory"));
-        assert!(cap.contains_key("pods"));
        assert_eq!(cap["cpu"].0, expected_cpu);
    }
 }
--- a/crates/reddwarf-runtime/src/sysinfo.rs
+++ b/crates/reddwarf-runtime/src/sysinfo.rs
@ -0,0 +1,173 @@
+use crate::error::RuntimeError;
+
+/// Detected physical resources of the host.
+#[derive(Debug, Clone)]
+pub struct SystemResources {
+    /// Number of logical CPUs.
+    pub cpu_count: u32,
+    /// Total physical memory in bytes.
+    pub total_memory_bytes: u64,
+}
+
+/// How much CPU / memory to reserve for system daemons.
+#[derive(Debug, Clone)]
+pub struct ResourceReservation {
+    /// CPU to reserve in millicores (e.g. 100 = 100m).
+    pub cpu_millicores: i64,
+    /// Memory to reserve in bytes.
+    pub memory_bytes: i64,
+}
+
+/// Computed node resource budget (capacity minus reservation).
+#[derive(Debug, Clone)]
+pub struct NodeResources {
+    /// Raw hardware capacity.
+    pub capacity: SystemResources,
+    /// Allocatable CPU after subtracting reservation, in millicores.
+    pub allocatable_cpu_millicores: i64,
+    /// Allocatable memory after subtracting reservation, in bytes.
+    pub allocatable_memory_bytes: u64,
+    /// Maximum number of pods this node will accept.
+    pub max_pods: u32,
+}
+
+/// Detect the host's CPU count and total memory.
+///
+/// Uses the `sys_info` crate which supports illumos, Linux, and macOS.
+pub fn detect_system_resources() -> Result<SystemResources, RuntimeError> {
+    let cpu_count = sys_info::cpu_num().map_err(|e| {
+        RuntimeError::resource_detection_failed(format!("failed to detect CPU count: {e}"))
+    })?;
+
+    let mem = sys_info::mem_info().map_err(|e| {
+        RuntimeError::resource_detection_failed(format!("failed to detect memory: {e}"))
+    })?;
+
+    // sys_info::mem_info().total is in KiB
+    let total_memory_bytes = mem.total * 1024;
+
+    Ok(SystemResources {
+        cpu_count,
+        total_memory_bytes,
+    })
+}
+
+/// Detect system resources and compute allocatable values after subtracting
+/// the given reservation. Allocatable values are clamped so they never go
+/// negative.
+pub fn compute_node_resources(
+    reservation: &ResourceReservation,
+    max_pods: u32,
+) -> Result<NodeResources, RuntimeError> {
+    let capacity = detect_system_resources()?;
+
+    let capacity_cpu_millicores = capacity.cpu_count as i64 * 1000;
+    let allocatable_cpu_millicores =
+        (capacity_cpu_millicores - reservation.cpu_millicores).max(0);
+
+    let allocatable_memory_bytes =
+        (capacity.total_memory_bytes as i64 - reservation.memory_bytes).max(0) as u64;
+
+    Ok(NodeResources {
+        capacity,
+        allocatable_cpu_millicores,
+        allocatable_memory_bytes,
+        max_pods,
+    })
+}
+
+/// Convert a byte count to the most human-friendly Kubernetes Quantity string.
+///
+/// Picks the largest clean binary unit: `"16Gi"`, `"7680Mi"`, `"512Ki"`, or
+/// raw bytes if nothing divides evenly.
+pub fn format_memory_quantity(bytes: u64) -> String {
+    const GIB: u64 = 1024 * 1024 * 1024;
+    const MIB: u64 = 1024 * 1024;
+    const KIB: u64 = 1024;
+
+    if bytes > 0 && bytes % GIB == 0 {
+        format!("{}Gi", bytes / GIB)
+    } else if bytes > 0 && bytes % MIB == 0 {
+        format!("{}Mi", bytes / MIB)
+    } else if bytes > 0 && bytes % KIB == 0 {
+        format!("{}Ki", bytes / KIB)
+    } else {
+        format!("{}", bytes)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_detect_system_resources() {
+        let res = detect_system_resources().expect("detection should succeed in test env");
+        assert!(res.cpu_count > 0, "should detect at least 1 CPU");
+        assert!(
+            res.total_memory_bytes > 0,
+            "should detect nonzero memory"
+        );
+    }
+
+    #[test]
+    fn test_format_memory_quantity_gi() {
+        assert_eq!(format_memory_quantity(16 * 1024 * 1024 * 1024), "16Gi");
+        assert_eq!(format_memory_quantity(1024 * 1024 * 1024), "1Gi");
+    }
+
+    #[test]
+    fn test_format_memory_quantity_mi() {
+        assert_eq!(format_memory_quantity(7680 * 1024 * 1024), "7680Mi");
+        assert_eq!(format_memory_quantity(512 * 1024 * 1024), "512Mi");
+    }
+
+    #[test]
+    fn test_format_memory_quantity_ki() {
+        assert_eq!(format_memory_quantity(512 * 1024), "512Ki");
+    }
+
+    #[test]
+    fn test_format_memory_quantity_raw_bytes() {
+        assert_eq!(format_memory_quantity(1023), "1023");
+        assert_eq!(format_memory_quantity(0), "0");
+    }
+
+    #[test]
+    fn test_compute_reserves_subtracted() {
+        let reservation = ResourceReservation {
+            cpu_millicores: 100,
+            memory_bytes: 256 * 1024 * 1024,
+        };
+        let nr = compute_node_resources(&reservation, 110)
+            .expect("should succeed in test env");
+
+        let capacity_cpu_millis = nr.capacity.cpu_count as i64 * 1000;
+        assert!(
+            nr.allocatable_cpu_millicores < capacity_cpu_millis,
+            "allocatable CPU ({}) should be less than capacity ({})",
+            nr.allocatable_cpu_millicores,
+            capacity_cpu_millis,
+        );
+        assert!(
+            nr.allocatable_memory_bytes < nr.capacity.total_memory_bytes,
+            "allocatable memory ({}) should be less than capacity ({})",
+            nr.allocatable_memory_bytes,
+            nr.capacity.total_memory_bytes,
+        );
+        assert_eq!(nr.max_pods, 110);
+    }
+
+    #[test]
+    fn test_reservation_clamp_to_zero() {
+        let reservation = ResourceReservation {
+            cpu_millicores: i64::MAX,
+            memory_bytes: i64::MAX,
+        };
+        let nr = compute_node_resources(&reservation, 110)
+            .expect("should succeed in test env");
+
+        assert_eq!(nr.allocatable_cpu_millicores, 0);
+        assert_eq!(nr.allocatable_memory_bytes, 0);
+    }
+}
--- a/crates/reddwarf/src/main.rs
+++ b/crates/reddwarf/src/main.rs
@ -1,6 +1,6 @@
 use clap::{Parser, Subcommand};
 use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig, TlsMode};
-use reddwarf_core::Namespace;
+use reddwarf_core::{Namespace, ResourceQuantities};
 use reddwarf_runtime::{
    ApiClient, Ipam, MockRuntime, MockStorageEngine, NodeAgent, NodeAgentConfig,
    NodeHealthChecker, NodeHealthCheckerConfig, PodController, PodControllerConfig, StorageEngine,
@ -84,6 +84,15 @@ enum Commands {
        /// Etherstub name for pod networking
        #[arg(long, default_value = "reddwarf0")]
        etherstub_name: String,
+        /// CPU to reserve for system daemons (e.g. "100m", "0.1")
+        #[arg(long, default_value = "100m")]
+        system_reserved_cpu: String,
+        /// Memory to reserve for system daemons (e.g. "256Mi", "1Gi")
+        #[arg(long, default_value = "256Mi")]
+        system_reserved_memory: String,
+        /// Maximum number of pods this node will accept
+        #[arg(long, default_value_t = 110)]
+        max_pods: u32,
        #[command(flatten)]
        tls_args: TlsArgs,
    },
@ -118,8 +127,30 @@ async fn main() -> miette::Result<()> {
            zonepath_prefix,
            pod_cidr,
            etherstub_name,
+            system_reserved_cpu,
+            system_reserved_memory,
+            max_pods,
            tls_args,
        } => {
+            let reserved_cpu_millicores =
+                ResourceQuantities::parse_cpu(&system_reserved_cpu).map_err(|e| {
+                    miette::miette!(
+                        help = "Use a value like '100m' or '0.1' for --system-reserved-cpu",
+                        "Invalid --system-reserved-cpu '{}': {}",
+                        system_reserved_cpu,
+                        e
+                    )
+                })?;
+            let reserved_memory_bytes =
+                ResourceQuantities::parse_memory(&system_reserved_memory).map_err(|e| {
+                    miette::miette!(
+                        help = "Use a value like '256Mi' or '1Gi' for --system-reserved-memory",
+                        "Invalid --system-reserved-memory '{}': {}",
+                        system_reserved_memory,
+                        e
+                    )
+                })?;
+
            run_agent(
                &node_name,
                &bind,
@ -131,6 +162,9 @@ async fn main() -> miette::Result<()> {
                zonepath_prefix.as_deref(),
                &pod_cidr,
                &etherstub_name,
+                reserved_cpu_millicores,
+                reserved_memory_bytes,
+                max_pods,
                &tls_args,
            )
            .await
@ -228,6 +262,9 @@ async fn run_agent(
    zonepath_prefix: Option<&str>,
    pod_cidr: &str,
    etherstub_name: &str,
+    system_reserved_cpu_millicores: i64,
+    system_reserved_memory_bytes: i64,
+    max_pods: u32,
    tls_args: &TlsArgs,
 ) -> miette::Result<()> {
    info!("Starting reddwarf agent for node '{}'", node_name);
@ -337,7 +374,10 @@ async fn run_agent(
    });

    // 6. Spawn node agent
-    let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
+    let mut node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
+    node_agent_config.system_reserved_cpu_millicores = system_reserved_cpu_millicores;
+    node_agent_config.system_reserved_memory_bytes = system_reserved_memory_bytes;
+    node_agent_config.max_pods = max_pods;
    let node_agent = NodeAgent::new(api_client.clone(), node_agent_config);
    let agent_token = token.clone();
    let node_agent_handle = tokio::spawn(async move {