Add dynamic node resource detection with configurable system reservations

Replace hardcoded memory (8Gi) and pod limits (110) in the node agent with
actual system detection via the sys-info crate. CPU and memory are detected
once at NodeAgent construction and reused on every heartbeat. Capacity
reports raw hardware values while allocatable subtracts configurable
reservations (--system-reserved-cpu, --system-reserved-memory, --max-pods),
giving the scheduler accurate data for filtering and scoring.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Till Wegmueller 2026-02-14 21:17:43 +01:00
parent 58171c7555
commit d3eb0b2511
No known key found for this signature in database
8 changed files with 423 additions and 25 deletions

11
Cargo.lock generated
View file

@ -1543,6 +1543,7 @@ dependencies = [
"reqwest",
"serde",
"serde_json",
"sys-info",
"tempfile",
"thiserror 2.0.18",
"tokio",
@ -2020,6 +2021,16 @@ dependencies = [
"syn",
]
[[package]]
name = "sys-info"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "system-configuration"
version = "0.6.1"

View file

@ -80,6 +80,9 @@ rustls = "0.23"
rustls-pemfile = "2.0"
axum-server = { version = "0.7", features = ["tls-rustls"] }
# System info
sys-info = "0.9"
# Testing
tempfile = "3.0"

View file

@ -24,6 +24,7 @@ async-trait = { workspace = true }
reqwest = { workspace = true }
chrono = { workspace = true }
futures-util = { workspace = true }
sys-info = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }

View file

@ -144,6 +144,17 @@ pub enum RuntimeError {
message: String,
},
/// Resource detection failed
#[error("Failed to detect system resources: {message}")]
#[diagnostic(
code(reddwarf::runtime::resource_detection_failed),
help("System resource detection is non-fatal. The node agent will fall back to default values (CPU from available_parallelism, 8Gi memory, 110 pods). To investigate, check that /proc/meminfo is readable (Linux) or that the system supports sysconf (illumos/macOS)")
)]
ResourceDetectionFailed {
#[allow(unused)]
message: String,
},
/// Internal error
#[error("Internal runtime error: {message}")]
#[diagnostic(
@ -224,6 +235,12 @@ impl RuntimeError {
}
}
pub fn resource_detection_failed(message: impl Into<String>) -> Self {
Self::ResourceDetectionFailed {
message: message.into(),
}
}
pub fn internal_error(message: impl Into<String>) -> Self {
Self::InternalError {
message: message.into(),

View file

@ -13,6 +13,7 @@ pub mod network;
pub mod node_agent;
pub mod node_health;
pub mod storage;
pub mod sysinfo;
pub mod traits;
pub mod types;
pub mod zone;

View file

@ -1,5 +1,8 @@
use crate::api_client::ApiClient;
use crate::error::{Result, RuntimeError};
use crate::sysinfo::{
compute_node_resources, format_memory_quantity, NodeResources, ResourceReservation,
};
use k8s_openapi::api::core::v1::{Node, NodeAddress, NodeCondition, NodeStatus};
use k8s_openapi::apimachinery::pkg::api::resource::Quantity;
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
@ -18,6 +21,12 @@ pub struct NodeAgentConfig {
pub api_url: String,
/// Interval between heartbeats
pub heartbeat_interval: Duration,
/// CPU to reserve for system daemons, in millicores (default: 100 = 100m)
pub system_reserved_cpu_millicores: i64,
/// Memory to reserve for system daemons, in bytes (default: 256Mi)
pub system_reserved_memory_bytes: i64,
/// Maximum number of pods this node will accept (default: 110)
pub max_pods: u32,
}
impl NodeAgentConfig {
@ -26,6 +35,9 @@ impl NodeAgentConfig {
node_name,
api_url,
heartbeat_interval: Duration::from_secs(10),
system_reserved_cpu_millicores: 100,
system_reserved_memory_bytes: 256 * 1024 * 1024,
max_pods: 110,
}
}
}
@ -34,11 +46,56 @@ impl NodeAgentConfig {
pub struct NodeAgent {
api_client: Arc<ApiClient>,
config: NodeAgentConfig,
/// Detected system resources (None if detection failed at startup).
detected: Option<NodeResources>,
}
impl NodeAgent {
pub fn new(api_client: Arc<ApiClient>, config: NodeAgentConfig) -> Self {
Self { api_client, config }
let reservation = ResourceReservation {
cpu_millicores: config.system_reserved_cpu_millicores,
memory_bytes: config.system_reserved_memory_bytes,
};
let detected = match compute_node_resources(&reservation, config.max_pods) {
Ok(nr) => {
info!(
cpu_count = nr.capacity.cpu_count,
total_memory = %format_memory_quantity(nr.capacity.total_memory_bytes),
allocatable_cpu_m = nr.allocatable_cpu_millicores,
allocatable_memory = %format_memory_quantity(nr.allocatable_memory_bytes),
max_pods = nr.max_pods,
"Detected system resources"
);
Some(nr)
}
Err(e) => {
warn!(
error = %e,
"Failed to detect system resources, falling back to defaults"
);
None
}
};
Self {
api_client,
config,
detected,
}
}
#[cfg(test)]
fn new_with_detected(
api_client: Arc<ApiClient>,
config: NodeAgentConfig,
detected: Option<NodeResources>,
) -> Self {
Self {
api_client,
config,
detected,
}
}
/// Register this host as a Node resource
@ -108,22 +165,48 @@ impl NodeAgent {
fn build_node(&self) -> Node {
let hostname = self.config.node_name.clone();
let (capacity, allocatable) = if let Some(ref nr) = self.detected {
let cap_cpu = nr.capacity.cpu_count.to_string();
let cap_mem = format_memory_quantity(nr.capacity.total_memory_bytes);
let pods = nr.max_pods.to_string();
let alloc_cpu = format!("{}m", nr.allocatable_cpu_millicores);
let alloc_mem = format_memory_quantity(nr.allocatable_memory_bytes);
let capacity = BTreeMap::from([
("cpu".to_string(), Quantity(cap_cpu)),
("memory".to_string(), Quantity(cap_mem)),
("pods".to_string(), Quantity(pods.clone())),
]);
let allocatable = BTreeMap::from([
("cpu".to_string(), Quantity(alloc_cpu)),
("memory".to_string(), Quantity(alloc_mem)),
("pods".to_string(), Quantity(pods)),
]);
(capacity, allocatable)
} else {
// Fallback: use available_parallelism for CPU, hardcoded memory
let cpu_count = std::thread::available_parallelism()
.map(|n| n.get().to_string())
.unwrap_or_else(|_| "1".to_string());
let allocatable = BTreeMap::from([
let capacity = BTreeMap::from([
("cpu".to_string(), Quantity(cpu_count.clone())),
("memory".to_string(), Quantity("8Gi".to_string())),
("pods".to_string(), Quantity("110".to_string())),
]);
let capacity = BTreeMap::from([
let allocatable = BTreeMap::from([
("cpu".to_string(), Quantity(cpu_count)),
("memory".to_string(), Quantity("8Gi".to_string())),
("pods".to_string(), Quantity("110".to_string())),
]);
(capacity, allocatable)
};
Node {
metadata: ObjectMeta {
name: Some(self.config.node_name.clone()),
@ -169,6 +252,7 @@ impl NodeAgent {
#[cfg(test)]
mod tests {
use super::*;
use crate::sysinfo::detect_system_resources;
#[test]
fn test_node_agent_config_defaults() {
@ -176,6 +260,9 @@ mod tests {
NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
assert_eq!(config.node_name, "test-node");
assert_eq!(config.heartbeat_interval, Duration::from_secs(10));
assert_eq!(config.system_reserved_cpu_millicores, 100);
assert_eq!(config.system_reserved_memory_bytes, 256 * 1024 * 1024);
assert_eq!(config.max_pods, 110);
}
#[test]
@ -206,25 +293,90 @@ mod tests {
let node = agent.build_node();
let status = node.status.unwrap();
// Check allocatable
// Check allocatable has all keys
let alloc = status.allocatable.unwrap();
assert!(alloc.contains_key("cpu"));
assert!(alloc.contains_key("memory"));
assert!(alloc.contains_key("pods"));
assert_eq!(alloc["memory"].0, "8Gi");
assert_eq!(alloc["pods"].0, "110");
// CPU should match available parallelism
// Memory should come from detected system (not hardcoded 8Gi)
let sys = detect_system_resources().expect("detection works in test");
let expected_cap_mem = format_memory_quantity(sys.total_memory_bytes);
let cap = status.capacity.unwrap();
assert_eq!(cap["memory"].0, expected_cap_mem);
// Capacity CPU should match detected cpu_count
assert_eq!(cap["cpu"].0, sys.cpu_count.to_string());
}
#[test]
fn test_build_node_allocatable_less_than_capacity() {
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
let config =
NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
let agent = NodeAgent::new(api_client, config);
// Agent should have detected resources (we're on a real host)
assert!(agent.detected.is_some(), "detection should succeed in tests");
let node = agent.build_node();
let status = node.status.unwrap();
let cap = status.capacity.unwrap();
let alloc = status.allocatable.unwrap();
// Allocatable CPU (millicores) should be less than capacity CPU (whole cores)
let cap_cpu_m = reddwarf_core::resources::ResourceQuantities::parse_cpu(&cap["cpu"].0)
.expect("valid cpu");
let alloc_cpu_m =
reddwarf_core::resources::ResourceQuantities::parse_cpu(&alloc["cpu"].0)
.expect("valid cpu");
assert!(
alloc_cpu_m < cap_cpu_m,
"allocatable CPU {}m should be less than capacity {}m",
alloc_cpu_m,
cap_cpu_m,
);
// Allocatable memory should be less than capacity memory
let cap_mem =
reddwarf_core::resources::ResourceQuantities::parse_memory(&cap["memory"].0)
.expect("valid mem");
let alloc_mem =
reddwarf_core::resources::ResourceQuantities::parse_memory(&alloc["memory"].0)
.expect("valid mem");
assert!(
alloc_mem < cap_mem,
"allocatable memory {} should be less than capacity {}",
alloc_mem,
cap_mem,
);
}
#[test]
fn test_build_node_fallback_on_detection_failure() {
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
let config =
NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
// Simulate detection failure
let agent = NodeAgent::new_with_detected(api_client, config, None);
let node = agent.build_node();
let status = node.status.unwrap();
let alloc = status.allocatable.unwrap();
let cap = status.capacity.unwrap();
// Should fall back to hardcoded defaults
assert_eq!(alloc["memory"].0, "8Gi");
assert_eq!(alloc["pods"].0, "110");
assert_eq!(cap["memory"].0, "8Gi");
assert_eq!(cap["pods"].0, "110");
// CPU falls back to available_parallelism
let expected_cpu = std::thread::available_parallelism()
.map(|n| n.get().to_string())
.unwrap_or_else(|_| "1".to_string());
assert_eq!(alloc["cpu"].0, expected_cpu);
// Check capacity
let cap = status.capacity.unwrap();
assert!(cap.contains_key("cpu"));
assert!(cap.contains_key("memory"));
assert!(cap.contains_key("pods"));
assert_eq!(cap["cpu"].0, expected_cpu);
}
}

View file

@ -0,0 +1,173 @@
use crate::error::RuntimeError;
/// Detected physical resources of the host.
#[derive(Debug, Clone)]
pub struct SystemResources {
/// Number of logical CPUs.
pub cpu_count: u32,
/// Total physical memory in bytes.
pub total_memory_bytes: u64,
}
/// How much CPU / memory to reserve for system daemons.
#[derive(Debug, Clone)]
pub struct ResourceReservation {
/// CPU to reserve in millicores (e.g. 100 = 100m).
pub cpu_millicores: i64,
/// Memory to reserve in bytes.
pub memory_bytes: i64,
}
/// Computed node resource budget (capacity minus reservation).
#[derive(Debug, Clone)]
pub struct NodeResources {
/// Raw hardware capacity.
pub capacity: SystemResources,
/// Allocatable CPU after subtracting reservation, in millicores.
pub allocatable_cpu_millicores: i64,
/// Allocatable memory after subtracting reservation, in bytes.
pub allocatable_memory_bytes: u64,
/// Maximum number of pods this node will accept.
pub max_pods: u32,
}
/// Detect the host's CPU count and total memory.
///
/// Uses the `sys_info` crate which supports illumos, Linux, and macOS.
pub fn detect_system_resources() -> Result<SystemResources, RuntimeError> {
let cpu_count = sys_info::cpu_num().map_err(|e| {
RuntimeError::resource_detection_failed(format!("failed to detect CPU count: {e}"))
})?;
let mem = sys_info::mem_info().map_err(|e| {
RuntimeError::resource_detection_failed(format!("failed to detect memory: {e}"))
})?;
// sys_info::mem_info().total is in KiB
let total_memory_bytes = mem.total * 1024;
Ok(SystemResources {
cpu_count,
total_memory_bytes,
})
}
/// Detect system resources and compute allocatable values after subtracting
/// the given reservation. Allocatable values are clamped so they never go
/// negative.
pub fn compute_node_resources(
reservation: &ResourceReservation,
max_pods: u32,
) -> Result<NodeResources, RuntimeError> {
let capacity = detect_system_resources()?;
let capacity_cpu_millicores = capacity.cpu_count as i64 * 1000;
let allocatable_cpu_millicores =
(capacity_cpu_millicores - reservation.cpu_millicores).max(0);
let allocatable_memory_bytes =
(capacity.total_memory_bytes as i64 - reservation.memory_bytes).max(0) as u64;
Ok(NodeResources {
capacity,
allocatable_cpu_millicores,
allocatable_memory_bytes,
max_pods,
})
}
/// Convert a byte count to the most human-friendly Kubernetes Quantity string.
///
/// Picks the largest clean binary unit: `"16Gi"`, `"7680Mi"`, `"512Ki"`, or
/// raw bytes if nothing divides evenly.
pub fn format_memory_quantity(bytes: u64) -> String {
const GIB: u64 = 1024 * 1024 * 1024;
const MIB: u64 = 1024 * 1024;
const KIB: u64 = 1024;
if bytes > 0 && bytes % GIB == 0 {
format!("{}Gi", bytes / GIB)
} else if bytes > 0 && bytes % MIB == 0 {
format!("{}Mi", bytes / MIB)
} else if bytes > 0 && bytes % KIB == 0 {
format!("{}Ki", bytes / KIB)
} else {
format!("{}", bytes)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_system_resources() {
let res = detect_system_resources().expect("detection should succeed in test env");
assert!(res.cpu_count > 0, "should detect at least 1 CPU");
assert!(
res.total_memory_bytes > 0,
"should detect nonzero memory"
);
}
#[test]
fn test_format_memory_quantity_gi() {
assert_eq!(format_memory_quantity(16 * 1024 * 1024 * 1024), "16Gi");
assert_eq!(format_memory_quantity(1024 * 1024 * 1024), "1Gi");
}
#[test]
fn test_format_memory_quantity_mi() {
assert_eq!(format_memory_quantity(7680 * 1024 * 1024), "7680Mi");
assert_eq!(format_memory_quantity(512 * 1024 * 1024), "512Mi");
}
#[test]
fn test_format_memory_quantity_ki() {
assert_eq!(format_memory_quantity(512 * 1024), "512Ki");
}
#[test]
fn test_format_memory_quantity_raw_bytes() {
assert_eq!(format_memory_quantity(1023), "1023");
assert_eq!(format_memory_quantity(0), "0");
}
#[test]
fn test_compute_reserves_subtracted() {
let reservation = ResourceReservation {
cpu_millicores: 100,
memory_bytes: 256 * 1024 * 1024,
};
let nr = compute_node_resources(&reservation, 110)
.expect("should succeed in test env");
let capacity_cpu_millis = nr.capacity.cpu_count as i64 * 1000;
assert!(
nr.allocatable_cpu_millicores < capacity_cpu_millis,
"allocatable CPU ({}) should be less than capacity ({})",
nr.allocatable_cpu_millicores,
capacity_cpu_millis,
);
assert!(
nr.allocatable_memory_bytes < nr.capacity.total_memory_bytes,
"allocatable memory ({}) should be less than capacity ({})",
nr.allocatable_memory_bytes,
nr.capacity.total_memory_bytes,
);
assert_eq!(nr.max_pods, 110);
}
#[test]
fn test_reservation_clamp_to_zero() {
let reservation = ResourceReservation {
cpu_millicores: i64::MAX,
memory_bytes: i64::MAX,
};
let nr = compute_node_resources(&reservation, 110)
.expect("should succeed in test env");
assert_eq!(nr.allocatable_cpu_millicores, 0);
assert_eq!(nr.allocatable_memory_bytes, 0);
}
}

View file

@ -1,6 +1,6 @@
use clap::{Parser, Subcommand};
use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig, TlsMode};
use reddwarf_core::Namespace;
use reddwarf_core::{Namespace, ResourceQuantities};
use reddwarf_runtime::{
ApiClient, Ipam, MockRuntime, MockStorageEngine, NodeAgent, NodeAgentConfig,
NodeHealthChecker, NodeHealthCheckerConfig, PodController, PodControllerConfig, StorageEngine,
@ -84,6 +84,15 @@ enum Commands {
/// Etherstub name for pod networking
#[arg(long, default_value = "reddwarf0")]
etherstub_name: String,
/// CPU to reserve for system daemons (e.g. "100m", "0.1")
#[arg(long, default_value = "100m")]
system_reserved_cpu: String,
/// Memory to reserve for system daemons (e.g. "256Mi", "1Gi")
#[arg(long, default_value = "256Mi")]
system_reserved_memory: String,
/// Maximum number of pods this node will accept
#[arg(long, default_value_t = 110)]
max_pods: u32,
#[command(flatten)]
tls_args: TlsArgs,
},
@ -118,8 +127,30 @@ async fn main() -> miette::Result<()> {
zonepath_prefix,
pod_cidr,
etherstub_name,
system_reserved_cpu,
system_reserved_memory,
max_pods,
tls_args,
} => {
let reserved_cpu_millicores =
ResourceQuantities::parse_cpu(&system_reserved_cpu).map_err(|e| {
miette::miette!(
help = "Use a value like '100m' or '0.1' for --system-reserved-cpu",
"Invalid --system-reserved-cpu '{}': {}",
system_reserved_cpu,
e
)
})?;
let reserved_memory_bytes =
ResourceQuantities::parse_memory(&system_reserved_memory).map_err(|e| {
miette::miette!(
help = "Use a value like '256Mi' or '1Gi' for --system-reserved-memory",
"Invalid --system-reserved-memory '{}': {}",
system_reserved_memory,
e
)
})?;
run_agent(
&node_name,
&bind,
@ -131,6 +162,9 @@ async fn main() -> miette::Result<()> {
zonepath_prefix.as_deref(),
&pod_cidr,
&etherstub_name,
reserved_cpu_millicores,
reserved_memory_bytes,
max_pods,
&tls_args,
)
.await
@ -228,6 +262,9 @@ async fn run_agent(
zonepath_prefix: Option<&str>,
pod_cidr: &str,
etherstub_name: &str,
system_reserved_cpu_millicores: i64,
system_reserved_memory_bytes: i64,
max_pods: u32,
tls_args: &TlsArgs,
) -> miette::Result<()> {
info!("Starting reddwarf agent for node '{}'", node_name);
@ -337,7 +374,10 @@ async fn run_agent(
});
// 6. Spawn node agent
let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
let mut node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
node_agent_config.system_reserved_cpu_millicores = system_reserved_cpu_millicores;
node_agent_config.system_reserved_memory_bytes = system_reserved_memory_bytes;
node_agent_config.max_pods = max_pods;
let node_agent = NodeAgent::new(api_client.clone(), node_agent_config);
let agent_token = token.clone();
let node_agent_handle = tokio::spawn(async move {