Add ZoneBrandMatch scheduler filter to reject brand-incompatible nodes

The scheduler now checks pod brand annotations against node brand labels
before scoring, preventing brand-mismatched pods from being scheduled to
incompatible nodes and failing at zone-install time.

- Add `supported_brands` field to NodeAgentConfig, emitted as
  `reddwarf.io/zone-brands` node label (comma-separated)
- Add `ZoneBrandMatch` filter: reads `reddwarf.io/zone-brand` pod
  annotation vs node label, rejects mismatches with clear message
- Read pod brand annotation in controller's `pod_to_zone_config()`
  to select ZoneBrand instead of always using default_brand
- Add `--supported-brands` CLI flag to the agent subcommand

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Till Wegmueller 2026-02-14 21:45:51 +01:00
parent d3eb0b2511
commit 4c7f50a7a0
No known key found for this signature in database
4 changed files with 240 additions and 1 deletions

View file

@ -625,9 +625,21 @@ impl PodController {
None
};
let brand = pod
.metadata
.annotations
.as_ref()
.and_then(|a| a.get("reddwarf.io/zone-brand"))
.and_then(|v| match v.as_str() {
"lx" => Some(ZoneBrand::Lx),
"reddwarf" => Some(ZoneBrand::Reddwarf),
_ => None,
})
.unwrap_or_else(|| self.config.default_brand.clone());
Ok(ZoneConfig {
zone_name,
brand: self.config.default_brand.clone(),
brand,
zonepath,
network,
storage: ZoneStorageOpts::default(),
@ -1110,6 +1122,52 @@ mod tests {
assert!(result.is_ok());
}
#[test]
fn test_pod_to_zone_config_brand_from_annotation() {
let (controller, _dir) = make_test_controller();
let mut pod = Pod::default();
pod.metadata.name = Some("lx-pod".to_string());
pod.metadata.namespace = Some("default".to_string());
pod.metadata.annotations = Some(
[("reddwarf.io/zone-brand".to_string(), "lx".to_string())]
.into_iter()
.collect(),
);
pod.spec = Some(PodSpec {
containers: vec![Container {
name: "web".to_string(),
command: Some(vec!["/bin/sh".to_string()]),
..Default::default()
}],
..Default::default()
});
let zone_config = controller.pod_to_zone_config(&pod).unwrap();
assert_eq!(zone_config.brand, ZoneBrand::Lx);
}
#[test]
fn test_pod_to_zone_config_brand_default() {
let (controller, _dir) = make_test_controller();
let mut pod = Pod::default();
pod.metadata.name = Some("default-brand-pod".to_string());
pod.metadata.namespace = Some("default".to_string());
// No annotations
pod.spec = Some(PodSpec {
containers: vec![Container {
name: "web".to_string(),
command: Some(vec!["/bin/sh".to_string()]),
..Default::default()
}],
..Default::default()
});
let zone_config = controller.pod_to_zone_config(&pod).unwrap();
assert_eq!(zone_config.brand, ZoneBrand::Reddwarf);
}
#[tokio::test]
async fn test_reconcile_with_deletion_timestamp_uses_termination() {
let (controller, _dir) = make_test_controller();

View file

@ -27,6 +27,8 @@ pub struct NodeAgentConfig {
pub system_reserved_memory_bytes: i64,
/// Maximum number of pods this node will accept (default: 110)
pub max_pods: u32,
/// Zone brands this node supports (advertised via `reddwarf.io/zone-brands` label)
pub supported_brands: Vec<String>,
}
impl NodeAgentConfig {
@ -38,6 +40,7 @@ impl NodeAgentConfig {
system_reserved_cpu_millicores: 100,
system_reserved_memory_bytes: 256 * 1024 * 1024,
max_pods: 110,
supported_brands: vec!["reddwarf".into()],
}
}
}
@ -217,6 +220,10 @@ impl NodeAgent {
"node.kubernetes.io/instance-type".to_string(),
"reddwarf-zone".to_string(),
),
(
"reddwarf.io/zone-brands".to_string(),
self.config.supported_brands.join(","),
),
]
.into_iter()
.collect(),
@ -353,6 +360,33 @@ mod tests {
);
}
#[test]
fn test_build_node_has_brand_labels() {
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
let mut config =
NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
config.supported_brands = vec!["reddwarf".into(), "lx".into()];
let agent = NodeAgent::new(api_client, config);
let node = agent.build_node();
let labels = node.metadata.labels.unwrap();
assert_eq!(labels.get("reddwarf.io/zone-brands").unwrap(), "reddwarf,lx");
}
#[test]
fn test_build_node_has_brand_labels_default() {
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
let config =
NodeAgentConfig::new("test-node".to_string(), "http://127.0.0.1:6443".to_string());
let agent = NodeAgent::new(api_client, config);
let node = agent.build_node();
let labels = node.metadata.labels.unwrap();
assert_eq!(labels.get("reddwarf.io/zone-brands").unwrap(), "reddwarf");
}
#[test]
fn test_build_node_fallback_on_detection_failure() {
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));

View file

@ -220,9 +220,65 @@ impl FilterPredicate for TaintToleration {
}
}
/// Filter for zone brand compatibility between pod and node
pub struct ZoneBrandMatch;
impl FilterPredicate for ZoneBrandMatch {
fn filter(&self, context: &SchedulingContext, node: &Node) -> FilterResult {
let node_name = node
.metadata
.name
.as_ref()
.unwrap_or(&"unknown".to_string())
.clone();
// Read pod annotation "reddwarf.io/zone-brand" (default: "reddwarf")
let pod_brand = context
.pod
.metadata
.annotations
.as_ref()
.and_then(|a| a.get("reddwarf.io/zone-brand"))
.map(|s| s.as_str())
.unwrap_or("reddwarf");
// Read node label "reddwarf.io/zone-brands" — if absent, pass (backward compat)
let node_brands_label = node
.metadata
.labels
.as_ref()
.and_then(|l| l.get("reddwarf.io/zone-brands"));
let node_brands_label = match node_brands_label {
Some(label) => label,
None => return FilterResult::pass(node_name),
};
// Split node brands by comma, check if pod brand is in the list
let supported: Vec<&str> = node_brands_label.split(',').map(|s| s.trim()).collect();
if supported.contains(&pod_brand) {
FilterResult::pass(node_name)
} else {
FilterResult::fail(
node_name,
format!(
"Node does not support zone brand '{}': available brands are {:?}",
pod_brand, supported
),
)
}
}
fn name(&self) -> &str {
"ZoneBrandMatch"
}
}
/// Get default filter predicates
pub fn default_filters() -> Vec<Box<dyn FilterPredicate>> {
vec![
Box::new(ZoneBrandMatch),
Box::new(PodFitsResources),
Box::new(NodeSelectorMatch),
Box::new(TaintToleration),
@ -336,4 +392,82 @@ mod tests {
assert!(!result.passed);
assert!(result.reason.unwrap().contains("Insufficient memory"));
}
fn create_branded_node(name: &str, brands: Option<&str>) -> Node {
let mut node = create_test_node(name, "4", "8Gi");
if let Some(brands) = brands {
node.metadata
.labels
.get_or_insert_with(BTreeMap::new)
.insert("reddwarf.io/zone-brands".to_string(), brands.to_string());
}
node
}
fn create_branded_pod(brand: Option<&str>) -> Pod {
let mut pod = create_test_pod("1", "1Gi");
if let Some(brand) = brand {
pod.metadata
.annotations
.get_or_insert_with(BTreeMap::new)
.insert("reddwarf.io/zone-brand".to_string(), brand.to_string());
}
pod
}
#[test]
fn test_zone_brand_match_pass() {
let node = create_branded_node("node1", Some("reddwarf"));
let pod = create_branded_pod(Some("reddwarf"));
let context = SchedulingContext::new(pod, vec![node.clone()]);
let filter = ZoneBrandMatch;
let result = filter.filter(&context, &node);
assert!(result.passed);
}
#[test]
fn test_zone_brand_match_fail() {
let node = create_branded_node("node1", Some("reddwarf"));
let pod = create_branded_pod(Some("lx"));
let context = SchedulingContext::new(pod, vec![node.clone()]);
let filter = ZoneBrandMatch;
let result = filter.filter(&context, &node);
assert!(!result.passed);
assert!(result.reason.unwrap().contains("does not support zone brand 'lx'"));
}
#[test]
fn test_zone_brand_match_no_annotation() {
let node = create_branded_node("node1", Some("reddwarf"));
let pod = create_branded_pod(None); // no annotation → defaults to "reddwarf"
let context = SchedulingContext::new(pod, vec![node.clone()]);
let filter = ZoneBrandMatch;
let result = filter.filter(&context, &node);
assert!(result.passed);
}
#[test]
fn test_zone_brand_match_no_node_label() {
let node = create_branded_node("node1", None); // no label → pass (backward compat)
let pod = create_branded_pod(Some("lx"));
let context = SchedulingContext::new(pod, vec![node.clone()]);
let filter = ZoneBrandMatch;
let result = filter.filter(&context, &node);
assert!(result.passed);
}
#[test]
fn test_zone_brand_match_multi_brand() {
let node = create_branded_node("node1", Some("reddwarf,lx"));
let pod = create_branded_pod(Some("lx"));
let context = SchedulingContext::new(pod, vec![node.clone()]);
let filter = ZoneBrandMatch;
let result = filter.filter(&context, &node);
assert!(result.passed);
}
}

View file

@ -93,6 +93,9 @@ enum Commands {
/// Maximum number of pods this node will accept
#[arg(long, default_value_t = 110)]
max_pods: u32,
/// Comma-separated list of zone brands this node supports
#[arg(long, default_value = "reddwarf")]
supported_brands: String,
#[command(flatten)]
tls_args: TlsArgs,
},
@ -130,6 +133,7 @@ async fn main() -> miette::Result<()> {
system_reserved_cpu,
system_reserved_memory,
max_pods,
supported_brands,
tls_args,
} => {
let reserved_cpu_millicores =
@ -151,6 +155,12 @@ async fn main() -> miette::Result<()> {
)
})?;
let supported_brands: Vec<String> = supported_brands
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
run_agent(
&node_name,
&bind,
@ -165,6 +175,7 @@ async fn main() -> miette::Result<()> {
reserved_cpu_millicores,
reserved_memory_bytes,
max_pods,
&supported_brands,
&tls_args,
)
.await
@ -265,6 +276,7 @@ async fn run_agent(
system_reserved_cpu_millicores: i64,
system_reserved_memory_bytes: i64,
max_pods: u32,
supported_brands: &[String],
tls_args: &TlsArgs,
) -> miette::Result<()> {
info!("Starting reddwarf agent for node '{}'", node_name);
@ -378,6 +390,7 @@ async fn run_agent(
node_agent_config.system_reserved_cpu_millicores = system_reserved_cpu_millicores;
node_agent_config.system_reserved_memory_bytes = system_reserved_memory_bytes;
node_agent_config.max_pods = max_pods;
node_agent_config.supported_brands = supported_brands.to_vec();
let node_agent = NodeAgent::new(api_client.clone(), node_agent_config);
let agent_token = token.clone();
let node_agent_handle = tokio::spawn(async move {