From d3eb0b2511f063015705e7a965a6fe43f1bed8b9 Mon Sep 17 00:00:00 2001
From: Till Wegmueller <toasterson@gmail.com>
Date: Sat, 14 Feb 2026 21:17:43 +0100
Subject: [PATCH] Add dynamic node resource detection with configurable system
 reservations

Replace hardcoded memory (8Gi) and pod limits (110) in the node agent with
actual system detection via the sys-info crate. CPU and memory are detected
once at NodeAgent construction and reused on every heartbeat. Capacity
reports raw hardware values while allocatable subtracts configurable
reservations (--system-reserved-cpu, --system-reserved-memory, --max-pods),
giving the scheduler accurate data for filtering and scoring.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Cargo.lock                                |  11 ++
 Cargo.toml                                |   3 +
 crates/reddwarf-runtime/Cargo.toml        |   1 +
 crates/reddwarf-runtime/src/error.rs      |  17 ++
 crates/reddwarf-runtime/src/lib.rs        |   1 +
 crates/reddwarf-runtime/src/node_agent.rs | 198 +++++++++++++++++++---
 crates/reddwarf-runtime/src/sysinfo.rs    | 173 +++++++++++++++++++
 crates/reddwarf/src/main.rs               |  44 ++++-
 8 files changed, 423 insertions(+), 25 deletions(-)
 create mode 100644 crates/reddwarf-runtime/src/sysinfo.rs
diff --git a/Cargo.lock b/Cargo.lock
index c24f206..95c8c3e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1543,6 +1543,7 @@ dependencies = [
  "reqwest",
  "serde",
  "serde_json",
+ "sys-info",
  "tempfile",
  "thiserror 2.0.18",
  "tokio",
@@ -2020,6 +2021,16 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "sys-info"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
+dependencies = [
+ "cc",
+ "libc",
+]
+
 [[package]]
 name = "system-configuration"
 version = "0.6.1"
diff --git a/Cargo.toml b/Cargo.toml
index 0376bba..fbe4e61 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -80,6 +80,9 @@ rustls = "0.23"
 rustls-pemfile = "2.0"
 axum-server = { version = "0.7", features = ["tls-rustls"] }
 
+# System info
+sys-info = "0.9"
+
 # Testing
 tempfile = "3.0"
 
diff --git a/crates/reddwarf-runtime/Cargo.toml b/crates/reddwarf-runtime/Cargo.toml
index 31d886e..f14f9f5 100644
--- a/crates/reddwarf-runtime/Cargo.toml
+++ b/crates/reddwarf-runtime/Cargo.toml
@@ -24,6 +24,7 @@ async-trait = { workspace = true }
 reqwest = { workspace = true }
 chrono = { workspace = true }
 futures-util = { workspace = true }
+sys-info = { workspace = true }
 
 [dev-dependencies]
 tempfile = { workspace = true }
diff --git a/crates/reddwarf-runtime/src/error.rs b/crates/reddwarf-runtime/src/error.rs
index 57b19d6..7c5dd2e 100644
--- a/crates/reddwarf-runtime/src/error.rs
+++ b/crates/reddwarf-runtime/src/error.rs
@@ -144,6 +144,17 @@ pub enum RuntimeError {
         message: String,
     },
 
+    /// Resource detection failed
+    #[error("Failed to detect system resources: {message}")]
+    #[diagnostic(
+        code(reddwarf::runtime::resource_detection_failed),
+        help("System resource detection is non-fatal. The node agent will fall back to default values (CPU from available_parallelism, 8Gi memory, 110 pods). To investigate, check that /proc/meminfo is readable (Linux) or that the system supports sysconf (illumos/macOS)")
+    )]
+    ResourceDetectionFailed {
+        #[allow(unused)]
+        message: String,
+    },
+
     /// Internal error
     #[error("Internal runtime error: {message}")]
     #[diagnostic(
@@ -224,6 +235,12 @@ impl RuntimeError {
         }
     }
 
+    pub fn resource_detection_failed(message: impl Into<String>) -> Self {
+        Self::ResourceDetectionFailed {
+            message: message.into(),
+        }
+    }
+
     pub fn internal_error(message: impl Into<String>) -> Self {
         Self::InternalError {
             message: message.into(),
diff --git a/crates/reddwarf-runtime/src/lib.rs b/crates/reddwarf-runtime/src/lib.rs
index dfb691a..3e9e6f8 100644
--- a/crates/reddwarf-runtime/src/lib.rs
+++ b/crates/reddwarf-runtime/src/lib.rs
@@ -13,6 +13,7 @@ pub mod network;
 pub mod node_agent;
 pub mod node_health;
 pub mod storage;
+pub mod sysinfo;
 pub mod traits;
 pub mod types;
 pub mod zone;
diff --git a/crates/reddwarf-runtime/src/node_agent.rs b/crates/reddwarf-runtime/src/node_agent.rs
index c2d49b6..30860c9 100644
--- a/crates/reddwarf-runtime/src/node_agent.rs
+++ b/crates/reddwarf-runtime/src/node_agent.rs
@@ -1,5 +1,8 @@
 use crate::api_client::ApiClient;
 use crate::error::{Result, RuntimeError};
+use crate::sysinfo::{
+    compute_node_resources, format_memory_quantity, NodeResources, ResourceReservation,
+};
 use k8s_openapi::api::core::v1::{Node, NodeAddress, NodeCondition, NodeStatus};
 use k8s_openapi::apimachinery::pkg::api::resource::Quantity;
 use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
@@ -18,6 +21,12 @@ pub struct NodeAgentConfig {
     pub api_url: String,
     /// Interval between heartbeats
     pub heartbeat_interval: Duration,
+    /// CPU to reserve for system daemons, in millicores (default: 100 = 100m)
+    pub system_reserved_cpu_millicores: i64,
+    /// Memory to reserve for system daemons, in bytes (default: 256Mi)
+    pub system_reserved_memory_bytes: i64,
+    /// Maximum number of pods this node will accept (default: 110)
+    pub max_pods: u32,
 }
 
 impl NodeAgentConfig {
@@ -26,6 +35,9 @@ impl NodeAgentConfig {
             node_name,
             api_url,
             heartbeat_interval: Duration::from_secs(10),
+            system_reserved_cpu_millicores: 100,
+            system_reserved_memory_bytes: 256 * 1024 * 1024,
+            max_pods: 110,
         }
     }
 }
@@ -34,11 +46,56 @@ impl NodeAgentConfig {
 pub struct NodeAgent {
     api_client: Arc<ApiClient>,
     config: NodeAgentConfig,
+    /// Detected system resources (None if detection failed at startup).
+    detected: Option<NodeResources>,
 }
 
 impl NodeAgent {
     pub fn new(api_client: Arc<ApiClient>, config: NodeAgentConfig) -> Self {
-        Self { api_client, config }
+        let reservation = ResourceReservation {
+            cpu_millicores: config.system_reserved_cpu_millicores,
+            memory_bytes: config.system_reserved_memory_bytes,
+        };
+
+        let detected = match compute_node_resources(&reservation, config.max_pods) {
+            Ok(nr) => {
+                info!(
+                    cpu_count = nr.capacity.cpu_count,
+                    total_memory = %format_memory_quantity(nr.capacity.total_memory_bytes),
+                    allocatable_cpu_m = nr.allocatable_cpu_millicores,
+                    allocatable_memory = %format_memory_quantity(nr.allocatable_memory_bytes),
+                    max_pods = nr.max_pods,
+                    "Detected system resources"
+                );
+                Some(nr)
+            }
+            Err(e) => {
+                warn!(
+                    error = %e,
+                    "Failed to detect system resources, falling back to defaults"
+                );
+                None
+            }
+        };
+
+        Self {
+            api_client,
+            config,
+            detected,
+        }
+    }
+
+    #[cfg(test)]
+    fn new_with_detected(
+        api_client: Arc<ApiClient>,
+        config: NodeAgentConfig,
+        detected: Option<NodeResources>,
+    ) -> Self {
+        Self {
+            api_client,
+            config,
+            detected,
+        }
     }
 
     /// Register this host as a Node resource
@@ -108,21 +165,47 @@ impl NodeAgent {
     fn build_node(&self) -> Node {
         let hostname = self.config.node_name.clone();
 
-        let cpu_count = std::thread::available_parallelism()
-            .map(|n| n.get().to_string())
-            .unwrap_or_else(|_| "1".to_string());
+        let (capacity, allocatable) = if let Some(ref nr) = self.detected {
+            let cap_cpu = nr.capacity.cpu_count.to_string();
+            let cap_mem = format_memory_quantity(nr.capacity.total_memory_bytes);
+            let pods = nr.max_pods.to_string();
 
-        let allocatable = BTreeMap::from([
-            ("cpu".to_string(), Quantity(cpu_count.clone())),
-            ("memory".to_string(), Quantity("8Gi".to_string())),
-            ("pods".to_string(), Quantity("110".to_string())),
-        ]);
+            let alloc_cpu = format!("{}m", nr.allocatable_cpu_millicores);
+            let alloc_mem = format_memory_quantity(nr.allocatable_memory_bytes);
 
-        let capacity = BTreeMap::from([
-            ("cpu".to_string(), Quantity(cpu_count)),
-            ("memory".to_string(), Quantity("8Gi".to_string())),
-            ("pods".to_string(), Quantity("110".to_string())),
-        ]);
+            let capacity = BTreeMap::from([
+                ("cpu".to_string(), Quantity(cap_cpu)),
+                ("memory".to_string(), Quantity(cap_mem)),
+                ("pods".to_string(), Quantity(pods.clone())),
+            ]);
+
+            let allocatable = BTreeMap::from([
+                ("cpu".to_string(), Quantity(alloc_cpu)),
+                ("memory".to_string(), Quantity(alloc_mem)),
+                ("pods".to_string(), Quantity(pods)),
+            ]);
+
+            (capacity, allocatable)
+        } else {
+            // Fallback: use available_parallelism for CPU, hardcoded memory
+            let cpu_count = std::thread::available_parallelism()
+                .map(|n| n.get().to_string())
+                .unwrap_or_else(|_| "1".to_string());
+
+            let capacity = BTreeMap::from([
+                ("cpu".to_string(), Quantity(cpu_count.clone())),
+                ("memory".to_string(), Quantity("8Gi".to_string())),
+                ("pods".to_string(), Quantity("110".to_string())),
+            ]);
+
+            let allocatable = BTreeMap::from([
+                ("cpu".to_string(), Quantity(cpu_count)),
+                ("memory".to_string(), Quantity("8Gi".to_string())),
+                ("pods".to_string(), Quantity("110".to_string())),
+            ]);
+
+            (capacity, allocatable)
+        };
 
         Node {
             metadata: ObjectMeta {
@@ -169,6 +252,7 @@ impl NodeAgent {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::sysinfo::detect_system_resources;
 
     #[test]
     fn test_node_agent_config_defaults() {
@@ -176,6 +260,9 @@ mod tests {
             NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
         assert_eq!(config.node_name, "test-node");
         assert_eq!(config.heartbeat_interval, Duration::from_secs(10));
+        assert_eq!(config.system_reserved_cpu_millicores, 100);
+        assert_eq!(config.system_reserved_memory_bytes, 256 * 1024 * 1024);
+        assert_eq!(config.max_pods, 110);
     }
 
     #[test]
@@ -206,25 +293,90 @@ mod tests {
         let node = agent.build_node();
         let status = node.status.unwrap();
 
-        // Check allocatable
+        // Check allocatable has all keys
         let alloc = status.allocatable.unwrap();
         assert!(alloc.contains_key("cpu"));
         assert!(alloc.contains_key("memory"));
         assert!(alloc.contains_key("pods"));
-        assert_eq!(alloc["memory"].0, "8Gi");
         assert_eq!(alloc["pods"].0, "110");
 
-        // CPU should match available parallelism
+        // Memory should come from detected system (not hardcoded 8Gi)
+        let sys = detect_system_resources().expect("detection works in test");
+        let expected_cap_mem = format_memory_quantity(sys.total_memory_bytes);
+        let cap = status.capacity.unwrap();
+        assert_eq!(cap["memory"].0, expected_cap_mem);
+
+        // Capacity CPU should match detected cpu_count
+        assert_eq!(cap["cpu"].0, sys.cpu_count.to_string());
+    }
+
+    #[test]
+    fn test_build_node_allocatable_less_than_capacity() {
+        let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
+        let config =
+            NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
+        let agent = NodeAgent::new(api_client, config);
+
+        // Agent should have detected resources (we're on a real host)
+        assert!(agent.detected.is_some(), "detection should succeed in tests");
+
+        let node = agent.build_node();
+        let status = node.status.unwrap();
+        let cap = status.capacity.unwrap();
+        let alloc = status.allocatable.unwrap();
+
+        // Allocatable CPU (millicores) should be less than capacity CPU (whole cores)
+        let cap_cpu_m = reddwarf_core::resources::ResourceQuantities::parse_cpu(&cap["cpu"].0)
+            .expect("valid cpu");
+        let alloc_cpu_m =
+            reddwarf_core::resources::ResourceQuantities::parse_cpu(&alloc["cpu"].0)
+                .expect("valid cpu");
+        assert!(
+            alloc_cpu_m < cap_cpu_m,
+            "allocatable CPU {}m should be less than capacity {}m",
+            alloc_cpu_m,
+            cap_cpu_m,
+        );
+
+        // Allocatable memory should be less than capacity memory
+        let cap_mem =
+            reddwarf_core::resources::ResourceQuantities::parse_memory(&cap["memory"].0)
+                .expect("valid mem");
+        let alloc_mem =
+            reddwarf_core::resources::ResourceQuantities::parse_memory(&alloc["memory"].0)
+                .expect("valid mem");
+        assert!(
+            alloc_mem < cap_mem,
+            "allocatable memory {} should be less than capacity {}",
+            alloc_mem,
+            cap_mem,
+        );
+    }
+
+    #[test]
+    fn test_build_node_fallback_on_detection_failure() {
+        let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
+        let config =
+            NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
+        // Simulate detection failure
+        let agent = NodeAgent::new_with_detected(api_client, config, None);
+
+        let node = agent.build_node();
+        let status = node.status.unwrap();
+        let alloc = status.allocatable.unwrap();
+        let cap = status.capacity.unwrap();
+
+        // Should fall back to hardcoded defaults
+        assert_eq!(alloc["memory"].0, "8Gi");
+        assert_eq!(alloc["pods"].0, "110");
+        assert_eq!(cap["memory"].0, "8Gi");
+        assert_eq!(cap["pods"].0, "110");
+
+        // CPU falls back to available_parallelism
         let expected_cpu = std::thread::available_parallelism()
             .map(|n| n.get().to_string())
             .unwrap_or_else(|_| "1".to_string());
         assert_eq!(alloc["cpu"].0, expected_cpu);
-
-        // Check capacity
-        let cap = status.capacity.unwrap();
-        assert!(cap.contains_key("cpu"));
-        assert!(cap.contains_key("memory"));
-        assert!(cap.contains_key("pods"));
         assert_eq!(cap["cpu"].0, expected_cpu);
     }
 }
diff --git a/crates/reddwarf-runtime/src/sysinfo.rs b/crates/reddwarf-runtime/src/sysinfo.rs
new file mode 100644
index 0000000..b7db695
--- /dev/null
+++ b/crates/reddwarf-runtime/src/sysinfo.rs
@@ -0,0 +1,173 @@
+use crate::error::RuntimeError;
+
+/// Detected physical resources of the host.
+#[derive(Debug, Clone)]
+pub struct SystemResources {
+    /// Number of logical CPUs.
+    pub cpu_count: u32,
+    /// Total physical memory in bytes.
+    pub total_memory_bytes: u64,
+}
+
+/// How much CPU / memory to reserve for system daemons.
+#[derive(Debug, Clone)]
+pub struct ResourceReservation {
+    /// CPU to reserve in millicores (e.g. 100 = 100m).
+    pub cpu_millicores: i64,
+    /// Memory to reserve in bytes.
+    pub memory_bytes: i64,
+}
+
+/// Computed node resource budget (capacity minus reservation).
+#[derive(Debug, Clone)]
+pub struct NodeResources {
+    /// Raw hardware capacity.
+    pub capacity: SystemResources,
+    /// Allocatable CPU after subtracting reservation, in millicores.
+    pub allocatable_cpu_millicores: i64,
+    /// Allocatable memory after subtracting reservation, in bytes.
+    pub allocatable_memory_bytes: u64,
+    /// Maximum number of pods this node will accept.
+    pub max_pods: u32,
+}
+
+/// Detect the host's CPU count and total memory.
+///
+/// Uses the `sys_info` crate which supports illumos, Linux, and macOS.
+pub fn detect_system_resources() -> Result<SystemResources, RuntimeError> {
+    let cpu_count = sys_info::cpu_num().map_err(|e| {
+        RuntimeError::resource_detection_failed(format!("failed to detect CPU count: {e}"))
+    })?;
+
+    let mem = sys_info::mem_info().map_err(|e| {
+        RuntimeError::resource_detection_failed(format!("failed to detect memory: {e}"))
+    })?;
+
+    // sys_info::mem_info().total is in KiB
+    let total_memory_bytes = mem.total * 1024;
+
+    Ok(SystemResources {
+        cpu_count,
+        total_memory_bytes,
+    })
+}
+
+/// Detect system resources and compute allocatable values after subtracting
+/// the given reservation. Allocatable values are clamped so they never go
+/// negative.
+pub fn compute_node_resources(
+    reservation: &ResourceReservation,
+    max_pods: u32,
+) -> Result<NodeResources, RuntimeError> {
+    let capacity = detect_system_resources()?;
+
+    let capacity_cpu_millicores = capacity.cpu_count as i64 * 1000;
+    let allocatable_cpu_millicores =
+        (capacity_cpu_millicores - reservation.cpu_millicores).max(0);
+
+    let allocatable_memory_bytes =
+        (capacity.total_memory_bytes as i64 - reservation.memory_bytes).max(0) as u64;
+
+    Ok(NodeResources {
+        capacity,
+        allocatable_cpu_millicores,
+        allocatable_memory_bytes,
+        max_pods,
+    })
+}
+
+/// Convert a byte count to the most human-friendly Kubernetes Quantity string.
+///
+/// Picks the largest clean binary unit: `"16Gi"`, `"7680Mi"`, `"512Ki"`, or
+/// raw bytes if nothing divides evenly.
+pub fn format_memory_quantity(bytes: u64) -> String {
+    const GIB: u64 = 1024 * 1024 * 1024;
+    const MIB: u64 = 1024 * 1024;
+    const KIB: u64 = 1024;
+
+    if bytes > 0 && bytes % GIB == 0 {
+        format!("{}Gi", bytes / GIB)
+    } else if bytes > 0 && bytes % MIB == 0 {
+        format!("{}Mi", bytes / MIB)
+    } else if bytes > 0 && bytes % KIB == 0 {
+        format!("{}Ki", bytes / KIB)
+    } else {
+        format!("{}", bytes)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_detect_system_resources() {
+        let res = detect_system_resources().expect("detection should succeed in test env");
+        assert!(res.cpu_count > 0, "should detect at least 1 CPU");
+        assert!(
+            res.total_memory_bytes > 0,
+            "should detect nonzero memory"
+        );
+    }
+
+    #[test]
+    fn test_format_memory_quantity_gi() {
+        assert_eq!(format_memory_quantity(16 * 1024 * 1024 * 1024), "16Gi");
+        assert_eq!(format_memory_quantity(1024 * 1024 * 1024), "1Gi");
+    }
+
+    #[test]
+    fn test_format_memory_quantity_mi() {
+        assert_eq!(format_memory_quantity(7680 * 1024 * 1024), "7680Mi");
+        assert_eq!(format_memory_quantity(512 * 1024 * 1024), "512Mi");
+    }
+
+    #[test]
+    fn test_format_memory_quantity_ki() {
+        assert_eq!(format_memory_quantity(512 * 1024), "512Ki");
+    }
+
+    #[test]
+    fn test_format_memory_quantity_raw_bytes() {
+        assert_eq!(format_memory_quantity(1023), "1023");
+        assert_eq!(format_memory_quantity(0), "0");
+    }
+
+    #[test]
+    fn test_compute_reserves_subtracted() {
+        let reservation = ResourceReservation {
+            cpu_millicores: 100,
+            memory_bytes: 256 * 1024 * 1024,
+        };
+        let nr = compute_node_resources(&reservation, 110)
+            .expect("should succeed in test env");
+
+        let capacity_cpu_millis = nr.capacity.cpu_count as i64 * 1000;
+        assert!(
+            nr.allocatable_cpu_millicores < capacity_cpu_millis,
+            "allocatable CPU ({}) should be less than capacity ({})",
+            nr.allocatable_cpu_millicores,
+            capacity_cpu_millis,
+        );
+        assert!(
+            nr.allocatable_memory_bytes < nr.capacity.total_memory_bytes,
+            "allocatable memory ({}) should be less than capacity ({})",
+            nr.allocatable_memory_bytes,
+            nr.capacity.total_memory_bytes,
+        );
+        assert_eq!(nr.max_pods, 110);
+    }
+
+    #[test]
+    fn test_reservation_clamp_to_zero() {
+        let reservation = ResourceReservation {
+            cpu_millicores: i64::MAX,
+            memory_bytes: i64::MAX,
+        };
+        let nr = compute_node_resources(&reservation, 110)
+            .expect("should succeed in test env");
+
+        assert_eq!(nr.allocatable_cpu_millicores, 0);
+        assert_eq!(nr.allocatable_memory_bytes, 0);
+    }
+}
diff --git a/crates/reddwarf/src/main.rs b/crates/reddwarf/src/main.rs
index e2248b0..dda8247 100644
--- a/crates/reddwarf/src/main.rs
+++ b/crates/reddwarf/src/main.rs
@@ -1,6 +1,6 @@
 use clap::{Parser, Subcommand};
 use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig, TlsMode};
-use reddwarf_core::Namespace;
+use reddwarf_core::{Namespace, ResourceQuantities};
 use reddwarf_runtime::{
     ApiClient, Ipam, MockRuntime, MockStorageEngine, NodeAgent, NodeAgentConfig,
     NodeHealthChecker, NodeHealthCheckerConfig, PodController, PodControllerConfig, StorageEngine,
@@ -84,6 +84,15 @@ enum Commands {
         /// Etherstub name for pod networking
         #[arg(long, default_value = "reddwarf0")]
         etherstub_name: String,
+        /// CPU to reserve for system daemons (e.g. "100m", "0.1")
+        #[arg(long, default_value = "100m")]
+        system_reserved_cpu: String,
+        /// Memory to reserve for system daemons (e.g. "256Mi", "1Gi")
+        #[arg(long, default_value = "256Mi")]
+        system_reserved_memory: String,
+        /// Maximum number of pods this node will accept
+        #[arg(long, default_value_t = 110)]
+        max_pods: u32,
         #[command(flatten)]
         tls_args: TlsArgs,
     },
@@ -118,8 +127,30 @@ async fn main() -> miette::Result<()> {
             zonepath_prefix,
             pod_cidr,
             etherstub_name,
+            system_reserved_cpu,
+            system_reserved_memory,
+            max_pods,
             tls_args,
         } => {
+            let reserved_cpu_millicores =
+                ResourceQuantities::parse_cpu(&system_reserved_cpu).map_err(|e| {
+                    miette::miette!(
+                        help = "Use a value like '100m' or '0.1' for --system-reserved-cpu",
+                        "Invalid --system-reserved-cpu '{}': {}",
+                        system_reserved_cpu,
+                        e
+                    )
+                })?;
+            let reserved_memory_bytes =
+                ResourceQuantities::parse_memory(&system_reserved_memory).map_err(|e| {
+                    miette::miette!(
+                        help = "Use a value like '256Mi' or '1Gi' for --system-reserved-memory",
+                        "Invalid --system-reserved-memory '{}': {}",
+                        system_reserved_memory,
+                        e
+                    )
+                })?;
+
             run_agent(
                 &node_name,
                 &bind,
@@ -131,6 +162,9 @@ async fn main() -> miette::Result<()> {
                 zonepath_prefix.as_deref(),
                 &pod_cidr,
                 &etherstub_name,
+                reserved_cpu_millicores,
+                reserved_memory_bytes,
+                max_pods,
                 &tls_args,
             )
             .await
@@ -228,6 +262,9 @@ async fn run_agent(
     zonepath_prefix: Option<&str>,
     pod_cidr: &str,
     etherstub_name: &str,
+    system_reserved_cpu_millicores: i64,
+    system_reserved_memory_bytes: i64,
+    max_pods: u32,
     tls_args: &TlsArgs,
 ) -> miette::Result<()> {
     info!("Starting reddwarf agent for node '{}'", node_name);
@@ -337,7 +374,10 @@ async fn run_agent(
     });
 
     // 6. Spawn node agent
-    let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
+    let mut node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
+    node_agent_config.system_reserved_cpu_millicores = system_reserved_cpu_millicores;
+    node_agent_config.system_reserved_memory_bytes = system_reserved_memory_bytes;
+    node_agent_config.max_pods = max_pods;
     let node_agent = NodeAgent::new(api_client.clone(), node_agent_config);
     let agent_token = token.clone();
     let node_agent_handle = tokio::spawn(async move {