From 4c7f50a7a0ff48ebacc328f51c832bb62c568d4d Mon Sep 17 00:00:00 2001 From: Till Wegmueller Date: Sat, 14 Feb 2026 21:45:51 +0100 Subject: [PATCH] Add ZoneBrandMatch scheduler filter to reject brand-incompatible nodes The scheduler now checks pod brand annotations against node brand labels before scoring, preventing brand-mismatched pods from being scheduled to incompatible nodes and failing at zone-install time. - Add `supported_brands` field to NodeAgentConfig, emitted as `reddwarf.io/zone-brands` node label (comma-separated) - Add `ZoneBrandMatch` filter: reads `reddwarf.io/zone-brand` pod annotation vs node label, rejects mismatches with clear message - Read pod brand annotation in controller's `pod_to_zone_config()` to select ZoneBrand instead of always using default_brand - Add `--supported-brands` CLI flag to the agent subcommand Co-Authored-By: Claude Opus 4.6 --- crates/reddwarf-runtime/src/controller.rs | 60 +++++++++- crates/reddwarf-runtime/src/node_agent.rs | 34 ++++++ crates/reddwarf-scheduler/src/filter.rs | 134 ++++++++++++++++++++++ crates/reddwarf/src/main.rs | 13 +++ 4 files changed, 240 insertions(+), 1 deletion(-) diff --git a/crates/reddwarf-runtime/src/controller.rs b/crates/reddwarf-runtime/src/controller.rs index 6944f1c..35c6090 100644 --- a/crates/reddwarf-runtime/src/controller.rs +++ b/crates/reddwarf-runtime/src/controller.rs @@ -625,9 +625,21 @@ impl PodController { None }; + let brand = pod + .metadata + .annotations + .as_ref() + .and_then(|a| a.get("reddwarf.io/zone-brand")) + .and_then(|v| match v.as_str() { + "lx" => Some(ZoneBrand::Lx), + "reddwarf" => Some(ZoneBrand::Reddwarf), + _ => None, + }) + .unwrap_or_else(|| self.config.default_brand.clone()); + Ok(ZoneConfig { zone_name, - brand: self.config.default_brand.clone(), + brand, zonepath, network, storage: ZoneStorageOpts::default(), @@ -1110,6 +1122,52 @@ mod tests { assert!(result.is_ok()); } + #[test] + fn test_pod_to_zone_config_brand_from_annotation() { + let (controller, _dir) = make_test_controller(); + + let mut pod = Pod::default(); + pod.metadata.name = Some("lx-pod".to_string()); + pod.metadata.namespace = Some("default".to_string()); + pod.metadata.annotations = Some( + [("reddwarf.io/zone-brand".to_string(), "lx".to_string())] + .into_iter() + .collect(), + ); + pod.spec = Some(PodSpec { + containers: vec![Container { + name: "web".to_string(), + command: Some(vec!["/bin/sh".to_string()]), + ..Default::default() + }], + ..Default::default() + }); + + let zone_config = controller.pod_to_zone_config(&pod).unwrap(); + assert_eq!(zone_config.brand, ZoneBrand::Lx); + } + + #[test] + fn test_pod_to_zone_config_brand_default() { + let (controller, _dir) = make_test_controller(); + + let mut pod = Pod::default(); + pod.metadata.name = Some("default-brand-pod".to_string()); + pod.metadata.namespace = Some("default".to_string()); + // No annotations + pod.spec = Some(PodSpec { + containers: vec![Container { + name: "web".to_string(), + command: Some(vec!["/bin/sh".to_string()]), + ..Default::default() + }], + ..Default::default() + }); + + let zone_config = controller.pod_to_zone_config(&pod).unwrap(); + assert_eq!(zone_config.brand, ZoneBrand::Reddwarf); + } + #[tokio::test] async fn test_reconcile_with_deletion_timestamp_uses_termination() { let (controller, _dir) = make_test_controller(); diff --git a/crates/reddwarf-runtime/src/node_agent.rs b/crates/reddwarf-runtime/src/node_agent.rs index 30860c9..aae27f9 100644 --- a/crates/reddwarf-runtime/src/node_agent.rs +++ b/crates/reddwarf-runtime/src/node_agent.rs @@ -27,6 +27,8 @@ pub struct NodeAgentConfig { pub system_reserved_memory_bytes: i64, /// Maximum number of pods this node will accept (default: 110) pub max_pods: u32, + /// Zone brands this node supports (advertised via `reddwarf.io/zone-brands` label) + pub supported_brands: Vec, } impl NodeAgentConfig { @@ -38,6 +40,7 @@ impl NodeAgentConfig { system_reserved_cpu_millicores: 100, system_reserved_memory_bytes: 256 * 1024 * 1024, max_pods: 110, + supported_brands: vec!["reddwarf".into()], } } } @@ -217,6 +220,10 @@ impl NodeAgent { "node.kubernetes.io/instance-type".to_string(), "reddwarf-zone".to_string(), ), + ( + "reddwarf.io/zone-brands".to_string(), + self.config.supported_brands.join(","), + ), ] .into_iter() .collect(), @@ -353,6 +360,33 @@ mod tests { ); } + #[test] + fn test_build_node_has_brand_labels() { + let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443")); + let mut config = + NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string()); + config.supported_brands = vec!["reddwarf".into(), "lx".into()]; + let agent = NodeAgent::new(api_client, config); + + let node = agent.build_node(); + + let labels = node.metadata.labels.unwrap(); + assert_eq!(labels.get("reddwarf.io/zone-brands").unwrap(), "reddwarf,lx"); + } + + #[test] + fn test_build_node_has_brand_labels_default() { + let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443")); + let config = + NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string()); + let agent = NodeAgent::new(api_client, config); + + let node = agent.build_node(); + + let labels = node.metadata.labels.unwrap(); + assert_eq!(labels.get("reddwarf.io/zone-brands").unwrap(), "reddwarf"); + } + #[test] fn test_build_node_fallback_on_detection_failure() { let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443")); diff --git a/crates/reddwarf-scheduler/src/filter.rs b/crates/reddwarf-scheduler/src/filter.rs index 2bac511..686b1f8 100644 --- a/crates/reddwarf-scheduler/src/filter.rs +++ b/crates/reddwarf-scheduler/src/filter.rs @@ -220,9 +220,65 @@ impl FilterPredicate for TaintToleration { } } +/// Filter for zone brand compatibility between pod and node +pub struct ZoneBrandMatch; + +impl FilterPredicate for ZoneBrandMatch { + fn filter(&self, context: &SchedulingContext, node: &Node) -> FilterResult { + let node_name = node + .metadata + .name + .as_ref() + .unwrap_or(&"unknown".to_string()) + .clone(); + + // Read pod annotation "reddwarf.io/zone-brand" (default: "reddwarf") + let pod_brand = context + .pod + .metadata + .annotations + .as_ref() + .and_then(|a| a.get("reddwarf.io/zone-brand")) + .map(|s| s.as_str()) + .unwrap_or("reddwarf"); + + // Read node label "reddwarf.io/zone-brands" — if absent, pass (backward compat) + let node_brands_label = node + .metadata + .labels + .as_ref() + .and_then(|l| l.get("reddwarf.io/zone-brands")); + + let node_brands_label = match node_brands_label { + Some(label) => label, + None => return FilterResult::pass(node_name), + }; + + // Split node brands by comma, check if pod brand is in the list + let supported: Vec<&str> = node_brands_label.split(',').map(|s| s.trim()).collect(); + + if supported.contains(&pod_brand) { + FilterResult::pass(node_name) + } else { + FilterResult::fail( + node_name, + format!( + "Node does not support zone brand '{}': available brands are {:?}", + pod_brand, supported + ), + ) + } + } + + fn name(&self) -> &str { + "ZoneBrandMatch" + } +} + /// Get default filter predicates pub fn default_filters() -> Vec> { vec![ + Box::new(ZoneBrandMatch), Box::new(PodFitsResources), Box::new(NodeSelectorMatch), Box::new(TaintToleration), @@ -336,4 +392,82 @@ mod tests { assert!(!result.passed); assert!(result.reason.unwrap().contains("Insufficient memory")); } + + fn create_branded_node(name: &str, brands: Option<&str>) -> Node { + let mut node = create_test_node(name, "4", "8Gi"); + if let Some(brands) = brands { + node.metadata + .labels + .get_or_insert_with(BTreeMap::new) + .insert("reddwarf.io/zone-brands".to_string(), brands.to_string()); + } + node + } + + fn create_branded_pod(brand: Option<&str>) -> Pod { + let mut pod = create_test_pod("1", "1Gi"); + if let Some(brand) = brand { + pod.metadata + .annotations + .get_or_insert_with(BTreeMap::new) + .insert("reddwarf.io/zone-brand".to_string(), brand.to_string()); + } + pod + } + + #[test] + fn test_zone_brand_match_pass() { + let node = create_branded_node("node1", Some("reddwarf")); + let pod = create_branded_pod(Some("reddwarf")); + let context = SchedulingContext::new(pod, vec![node.clone()]); + + let filter = ZoneBrandMatch; + let result = filter.filter(&context, &node); + assert!(result.passed); + } + + #[test] + fn test_zone_brand_match_fail() { + let node = create_branded_node("node1", Some("reddwarf")); + let pod = create_branded_pod(Some("lx")); + let context = SchedulingContext::new(pod, vec![node.clone()]); + + let filter = ZoneBrandMatch; + let result = filter.filter(&context, &node); + assert!(!result.passed); + assert!(result.reason.unwrap().contains("does not support zone brand 'lx'")); + } + + #[test] + fn test_zone_brand_match_no_annotation() { + let node = create_branded_node("node1", Some("reddwarf")); + let pod = create_branded_pod(None); // no annotation → defaults to "reddwarf" + let context = SchedulingContext::new(pod, vec![node.clone()]); + + let filter = ZoneBrandMatch; + let result = filter.filter(&context, &node); + assert!(result.passed); + } + + #[test] + fn test_zone_brand_match_no_node_label() { + let node = create_branded_node("node1", None); // no label → pass (backward compat) + let pod = create_branded_pod(Some("lx")); + let context = SchedulingContext::new(pod, vec![node.clone()]); + + let filter = ZoneBrandMatch; + let result = filter.filter(&context, &node); + assert!(result.passed); + } + + #[test] + fn test_zone_brand_match_multi_brand() { + let node = create_branded_node("node1", Some("reddwarf,lx")); + let pod = create_branded_pod(Some("lx")); + let context = SchedulingContext::new(pod, vec![node.clone()]); + + let filter = ZoneBrandMatch; + let result = filter.filter(&context, &node); + assert!(result.passed); + } } diff --git a/crates/reddwarf/src/main.rs b/crates/reddwarf/src/main.rs index dda8247..8e5ffc3 100644 --- a/crates/reddwarf/src/main.rs +++ b/crates/reddwarf/src/main.rs @@ -93,6 +93,9 @@ enum Commands { /// Maximum number of pods this node will accept #[arg(long, default_value_t = 110)] max_pods: u32, + /// Comma-separated list of zone brands this node supports + #[arg(long, default_value = "reddwarf")] + supported_brands: String, #[command(flatten)] tls_args: TlsArgs, }, @@ -130,6 +133,7 @@ async fn main() -> miette::Result<()> { system_reserved_cpu, system_reserved_memory, max_pods, + supported_brands, tls_args, } => { let reserved_cpu_millicores = @@ -151,6 +155,12 @@ async fn main() -> miette::Result<()> { ) })?; + let supported_brands: Vec = supported_brands + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + run_agent( &node_name, &bind, @@ -165,6 +175,7 @@ async fn main() -> miette::Result<()> { reserved_cpu_millicores, reserved_memory_bytes, max_pods, + &supported_brands, &tls_args, ) .await @@ -265,6 +276,7 @@ async fn run_agent( system_reserved_cpu_millicores: i64, system_reserved_memory_bytes: i64, max_pods: u32, + supported_brands: &[String], tls_args: &TlsArgs, ) -> miette::Result<()> { info!("Starting reddwarf agent for node '{}'", node_name); @@ -378,6 +390,7 @@ async fn run_agent( node_agent_config.system_reserved_cpu_millicores = system_reserved_cpu_millicores; node_agent_config.system_reserved_memory_bytes = system_reserved_memory_bytes; node_agent_config.max_pods = max_pods; + node_agent_config.supported_brands = supported_brands.to_vec(); let node_agent = NodeAgent::new(api_client.clone(), node_agent_config); let agent_token = token.clone(); let node_agent_handle = tokio::spawn(async move {