diff --git a/Cargo.lock b/Cargo.lock index d688f58..8cfd8a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1393,6 +1393,7 @@ dependencies = [ "k8s-openapi", "miette", "reddwarf-core", + "reddwarf-storage", "reqwest", "serde", "serde_json", diff --git a/crates/reddwarf-runtime/Cargo.toml b/crates/reddwarf-runtime/Cargo.toml index 150fda4..31d886e 100644 --- a/crates/reddwarf-runtime/Cargo.toml +++ b/crates/reddwarf-runtime/Cargo.toml @@ -9,6 +9,7 @@ rust-version.workspace = true [dependencies] reddwarf-core = { workspace = true } +reddwarf-storage = { workspace = true } k8s-openapi = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true } diff --git a/crates/reddwarf-runtime/src/brand/lx.rs b/crates/reddwarf-runtime/src/brand/lx.rs index fefd67e..45d1d68 100644 --- a/crates/reddwarf-runtime/src/brand/lx.rs +++ b/crates/reddwarf-runtime/src/brand/lx.rs @@ -28,6 +28,7 @@ mod tests { vnic_name: "vnic0".to_string(), ip_address: "10.0.0.2".to_string(), gateway: "10.0.0.1".to_string(), + prefix_len: 16, }), zfs: ZfsConfig { parent_dataset: "rpool/zones".to_string(), diff --git a/crates/reddwarf-runtime/src/controller.rs b/crates/reddwarf-runtime/src/controller.rs index 362fd7c..2ec9cdc 100644 --- a/crates/reddwarf-runtime/src/controller.rs +++ b/crates/reddwarf-runtime/src/controller.rs @@ -1,5 +1,6 @@ use crate::api_client::ApiClient; use crate::error::{Result, RuntimeError}; +use crate::network::{vnic_name_for_pod, Ipam}; use crate::traits::ZoneRuntime; use crate::types::*; use k8s_openapi::api::core::v1::{Pod, PodCondition, PodStatus}; @@ -22,8 +23,10 @@ pub struct PodControllerConfig { pub zfs_parent_dataset: String, /// Default zone brand pub default_brand: ZoneBrand, - /// Default network configuration - pub network: NetworkMode, + /// Name of the etherstub for pod networking + pub etherstub_name: String, + /// Pod CIDR (e.g., "10.88.0.0/16") + pub pod_cidr: String, } /// Pod controller that watches for Pod events and drives zone lifecycle @@ -32,6 +35,7 @@ pub struct PodController { api_client: Arc, event_tx: broadcast::Sender, config: PodControllerConfig, + ipam: Ipam, } impl PodController { @@ -40,12 +44,14 @@ impl PodController { api_client: Arc, event_tx: broadcast::Sender, config: PodControllerConfig, + ipam: Ipam, ) -> Self { Self { runtime, api_client, event_tx, config, + ipam, } } @@ -205,7 +211,7 @@ impl PodController { "" | "Pending" => { // Pod is assigned to us but has no phase — provision it info!("Provisioning zone for pod {}/{}", namespace, pod_name); - let zone_config = pod_to_zone_config(pod, &self.config)?; + let zone_config = self.pod_to_zone_config(pod)?; match self.runtime.provision(&zone_config).await { Ok(()) => { @@ -328,7 +334,7 @@ impl PodController { Ok(()) } - /// Handle pod deletion — deprovision the zone + /// Handle pod deletion — deprovision the zone and release IP pub async fn handle_delete(&self, pod: &Pod) -> Result<()> { let pod_name = pod .metadata @@ -348,7 +354,7 @@ impl PodController { } } - let zone_config = pod_to_zone_config(pod, &self.config)?; + let zone_config = self.pod_to_zone_config(pod)?; info!( "Deprovisioning zone for deleted pod {}/{}", namespace, pod_name @@ -361,9 +367,96 @@ impl PodController { ); } + // Release the IP allocation + if let Err(e) = self.ipam.release(namespace, pod_name) { + warn!( + "Failed to release IP for pod {}/{}: {}", + namespace, pod_name, e + ); + } + Ok(()) } + /// Convert a Pod spec to a ZoneConfig with per-pod VNIC and IP + fn pod_to_zone_config(&self, pod: &Pod) -> Result { + let pod_name = pod + .metadata + .name + .as_deref() + .ok_or_else(|| RuntimeError::internal_error("Pod has no name"))?; + let namespace = pod.metadata.namespace.as_deref().unwrap_or("default"); + + let spec = pod + .spec + .as_ref() + .ok_or_else(|| RuntimeError::internal_error("Pod has no spec"))?; + + let zone_name = pod_zone_name(namespace, pod_name); + let zonepath = format!("{}/{}", self.config.zonepath_prefix, zone_name); + + // Allocate a unique VNIC name and IP for this pod + let vnic_name = vnic_name_for_pod(namespace, pod_name); + let allocation = self.ipam.allocate(namespace, pod_name)?; + + let network = NetworkMode::Etherstub(EtherstubConfig { + etherstub_name: self.config.etherstub_name.clone(), + vnic_name, + ip_address: allocation.ip_address.to_string(), + gateway: allocation.gateway.to_string(), + prefix_len: allocation.prefix_len, + }); + + // Map containers to ContainerProcess entries + let processes: Vec = spec + .containers + .iter() + .map(|c| { + let command = c + .command + .clone() + .unwrap_or_default() + .into_iter() + .chain(c.args.clone().unwrap_or_default()) + .collect::>(); + + let env = c + .env + .as_ref() + .map(|envs| { + envs.iter() + .filter_map(|e| e.value.as_ref().map(|v| (e.name.clone(), v.clone()))) + .collect::>() + }) + .unwrap_or_default(); + + ContainerProcess { + name: c.name.clone(), + command, + working_dir: c.working_dir.clone(), + env, + } + }) + .collect(); + + Ok(ZoneConfig { + zone_name, + brand: self.config.default_brand.clone(), + zonepath, + network, + zfs: ZfsConfig { + parent_dataset: self.config.zfs_parent_dataset.clone(), + clone_from: None, + quota: None, + }, + lx_image_path: None, + processes, + cpu_cap: None, + memory_cap: None, + fs_mounts: vec![], + }) + } + /// Extract IP address from zone config network fn zone_ip(&self, config: &ZoneConfig) -> String { match &config.network { @@ -396,77 +489,38 @@ pub fn pod_zone_name(namespace: &str, pod_name: &str) -> String { } } -/// Convert a Pod spec to a ZoneConfig for the runtime -pub fn pod_to_zone_config(pod: &Pod, config: &PodControllerConfig) -> Result { - let pod_name = pod - .metadata - .name - .as_deref() - .ok_or_else(|| RuntimeError::internal_error("Pod has no name"))?; - let namespace = pod.metadata.namespace.as_deref().unwrap_or("default"); - - let spec = pod - .spec - .as_ref() - .ok_or_else(|| RuntimeError::internal_error("Pod has no spec"))?; - - let zone_name = pod_zone_name(namespace, pod_name); - let zonepath = format!("{}/{}", config.zonepath_prefix, zone_name); - - // Map containers to ContainerProcess entries - let processes: Vec = spec - .containers - .iter() - .map(|c| { - let command = c - .command - .clone() - .unwrap_or_default() - .into_iter() - .chain(c.args.clone().unwrap_or_default()) - .collect::>(); - - let env = c - .env - .as_ref() - .map(|envs| { - envs.iter() - .filter_map(|e| e.value.as_ref().map(|v| (e.name.clone(), v.clone()))) - .collect::>() - }) - .unwrap_or_default(); - - ContainerProcess { - name: c.name.clone(), - command, - working_dir: c.working_dir.clone(), - env, - } - }) - .collect(); - - Ok(ZoneConfig { - zone_name, - brand: config.default_brand.clone(), - zonepath, - network: config.network.clone(), - zfs: ZfsConfig { - parent_dataset: config.zfs_parent_dataset.clone(), - clone_from: None, - quota: None, - }, - lx_image_path: None, - processes, - cpu_cap: None, - memory_cap: None, - fs_mounts: vec![], - }) -} - #[cfg(test)] mod tests { use super::*; + use crate::network::Ipam; use k8s_openapi::api::core::v1::{Container, PodSpec}; + use reddwarf_storage::RedbBackend; + use std::net::Ipv4Addr; + use tempfile::tempdir; + + fn make_test_controller() -> (PodController, tempfile::TempDir) { + let dir = tempdir().unwrap(); + let db_path = dir.path().join("test-controller.redb"); + let storage = Arc::new(RedbBackend::new(&db_path).unwrap()); + let ipam = Ipam::new(storage, "10.88.0.0/16").unwrap(); + + let runtime = Arc::new(crate::mock::MockRuntime::new()); + let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443")); + let (event_tx, _) = broadcast::channel(16); + + let config = PodControllerConfig { + node_name: "node1".to_string(), + api_url: "http://127.0.0.1:6443".to_string(), + zonepath_prefix: "/zones".to_string(), + zfs_parent_dataset: "rpool/zones".to_string(), + default_brand: ZoneBrand::Reddwarf, + etherstub_name: "reddwarf0".to_string(), + pod_cidr: "10.88.0.0/16".to_string(), + }; + + let controller = PodController::new(runtime, api_client, event_tx, config, ipam); + (controller, dir) + } #[test] fn test_pod_zone_name_basic() { @@ -491,6 +545,8 @@ mod tests { #[test] fn test_pod_to_zone_config_maps_containers() { + let (controller, _dir) = make_test_controller(); + let mut pod = Pod::default(); pod.metadata.name = Some("test-pod".to_string()); pod.metadata.namespace = Some("default".to_string()); @@ -511,21 +567,7 @@ mod tests { ..Default::default() }); - let config = PodControllerConfig { - node_name: "node1".to_string(), - api_url: "http://127.0.0.1:6443".to_string(), - zonepath_prefix: "/zones".to_string(), - zfs_parent_dataset: "rpool/zones".to_string(), - default_brand: ZoneBrand::Reddwarf, - network: NetworkMode::Etherstub(EtherstubConfig { - etherstub_name: "reddwarf0".to_string(), - vnic_name: "vnic0".to_string(), - ip_address: "10.0.0.2".to_string(), - gateway: "10.0.0.1".to_string(), - }), - }; - - let zone_config = pod_to_zone_config(&pod, &config).unwrap(); + let zone_config = controller.pod_to_zone_config(&pod).unwrap(); assert_eq!(zone_config.zone_name, "reddwarf-default-test-pod"); assert_eq!(zone_config.zonepath, "/zones/reddwarf-default-test-pod"); @@ -539,29 +581,74 @@ mod tests { assert_eq!(zone_config.processes[1].command, vec!["/bin/sh", "-c"]); assert_eq!(zone_config.brand, ZoneBrand::Reddwarf); assert_eq!(zone_config.zfs.parent_dataset, "rpool/zones"); + + // Verify per-pod networking + match &zone_config.network { + NetworkMode::Etherstub(cfg) => { + assert_eq!(cfg.etherstub_name, "reddwarf0"); + assert_eq!(cfg.vnic_name, "vnic_default_test_pod"); + assert_eq!(cfg.ip_address, Ipv4Addr::new(10, 88, 0, 2).to_string()); + assert_eq!(cfg.gateway, Ipv4Addr::new(10, 88, 0, 1).to_string()); + assert_eq!(cfg.prefix_len, 16); + } + _ => panic!("Expected Etherstub network mode"), + } + } + + #[test] + fn test_pod_to_zone_config_unique_ips() { + let (controller, _dir) = make_test_controller(); + + let mut pod_a = Pod::default(); + pod_a.metadata.name = Some("pod-a".to_string()); + pod_a.metadata.namespace = Some("default".to_string()); + pod_a.spec = Some(PodSpec { + containers: vec![Container { + name: "web".to_string(), + command: Some(vec!["/bin/sh".to_string()]), + ..Default::default() + }], + ..Default::default() + }); + + let mut pod_b = Pod::default(); + pod_b.metadata.name = Some("pod-b".to_string()); + pod_b.metadata.namespace = Some("default".to_string()); + pod_b.spec = Some(PodSpec { + containers: vec![Container { + name: "web".to_string(), + command: Some(vec!["/bin/sh".to_string()]), + ..Default::default() + }], + ..Default::default() + }); + + let config_a = controller.pod_to_zone_config(&pod_a).unwrap(); + let config_b = controller.pod_to_zone_config(&pod_b).unwrap(); + + let ip_a = match &config_a.network { + NetworkMode::Etherstub(cfg) => cfg.ip_address.clone(), + _ => panic!("Expected Etherstub"), + }; + let ip_b = match &config_b.network { + NetworkMode::Etherstub(cfg) => cfg.ip_address.clone(), + _ => panic!("Expected Etherstub"), + }; + + assert_ne!(ip_a, ip_b, "Each pod should get a unique IP"); + assert_eq!(ip_a, "10.88.0.2"); + assert_eq!(ip_b, "10.88.0.3"); } #[test] fn test_pod_to_zone_config_no_spec_returns_error() { + let (controller, _dir) = make_test_controller(); + let mut pod = Pod::default(); pod.metadata.name = Some("test-pod".to_string()); // No spec set - let config = PodControllerConfig { - node_name: "node1".to_string(), - api_url: "http://127.0.0.1:6443".to_string(), - zonepath_prefix: "/zones".to_string(), - zfs_parent_dataset: "rpool/zones".to_string(), - default_brand: ZoneBrand::Reddwarf, - network: NetworkMode::Etherstub(EtherstubConfig { - etherstub_name: "reddwarf0".to_string(), - vnic_name: "vnic0".to_string(), - ip_address: "10.0.0.2".to_string(), - gateway: "10.0.0.1".to_string(), - }), - }; - - let result = pod_to_zone_config(&pod, &config); + let result = controller.pod_to_zone_config(&pod); assert!(result.is_err()); } } diff --git a/crates/reddwarf-runtime/src/error.rs b/crates/reddwarf-runtime/src/error.rs index cf4df1a..64b1cde 100644 --- a/crates/reddwarf-runtime/src/error.rs +++ b/crates/reddwarf-runtime/src/error.rs @@ -115,6 +115,22 @@ pub enum RuntimeError { #[diagnostic(transparent)] CoreError(#[from] reddwarf_core::ReddwarfError), + /// Storage error + #[error(transparent)] + #[diagnostic(transparent)] + StorageError(#[from] reddwarf_storage::StorageError), + + /// IP address pool exhausted + #[error("IPAM pool exhausted: no free addresses in {cidr}")] + #[diagnostic( + code(reddwarf::runtime::ipam_pool_exhausted), + help("Expand the pod CIDR range or delete unused pods to free addresses") + )] + IpamPoolExhausted { + #[allow(unused)] + cidr: String, + }, + /// Internal error #[error("Internal runtime error: {message}")] #[diagnostic( diff --git a/crates/reddwarf-runtime/src/lib.rs b/crates/reddwarf-runtime/src/lib.rs index 4d1d130..9007338 100644 --- a/crates/reddwarf-runtime/src/lib.rs +++ b/crates/reddwarf-runtime/src/lib.rs @@ -19,6 +19,7 @@ pub mod zone; // Re-export primary types pub use error::{Result, RuntimeError}; pub use mock::MockRuntime; +pub use network::{CidrConfig, IpAllocation, Ipam}; pub use traits::ZoneRuntime; pub use types::{ ContainerProcess, DirectNicConfig, EtherstubConfig, FsMount, NetworkMode, ZfsConfig, ZoneBrand, diff --git a/crates/reddwarf-runtime/src/mock.rs b/crates/reddwarf-runtime/src/mock.rs index 1b1e501..791d015 100644 --- a/crates/reddwarf-runtime/src/mock.rs +++ b/crates/reddwarf-runtime/src/mock.rs @@ -312,6 +312,7 @@ mod tests { vnic_name: format!("vnic_{}", name), ip_address: "10.0.0.2".to_string(), gateway: "10.0.0.1".to_string(), + prefix_len: 16, }), zfs: ZfsConfig { parent_dataset: "rpool/zones".to_string(), diff --git a/crates/reddwarf-runtime/src/network/ipam.rs b/crates/reddwarf-runtime/src/network/ipam.rs new file mode 100644 index 0000000..18eaac9 --- /dev/null +++ b/crates/reddwarf-runtime/src/network/ipam.rs @@ -0,0 +1,336 @@ +use crate::error::{Result, RuntimeError}; +use reddwarf_storage::KVStore; +use std::collections::BTreeMap; +use std::net::Ipv4Addr; +use std::sync::Arc; +use tracing::debug; + +/// Parsed CIDR configuration +#[derive(Debug, Clone)] +pub struct CidrConfig { + /// Base network address + pub network: Ipv4Addr, + /// CIDR prefix length + pub prefix_len: u8, + /// Gateway address (network + 1) + pub gateway: Ipv4Addr, + /// First allocatable host address (network + 2) + pub first_host: Ipv4Addr, + /// Broadcast address (last in range) + pub broadcast: Ipv4Addr, +} + +/// An allocated IP for a pod +#[derive(Debug, Clone)] +pub struct IpAllocation { + pub ip_address: Ipv4Addr, + pub gateway: Ipv4Addr, + pub prefix_len: u8, +} + +/// IPAM (IP Address Management) backed by a KVStore +/// +/// Storage keys: +/// - `ipam/_cidr` → the CIDR string (e.g. "10.88.0.0/16") +/// - `ipam/alloc/{ip}` → `"{namespace}/{pod_name}"` +pub struct Ipam { + storage: Arc, + cidr: CidrConfig, +} + +const IPAM_CIDR_KEY: &[u8] = b"ipam/_cidr"; +const IPAM_ALLOC_PREFIX: &[u8] = b"ipam/alloc/"; + +impl Ipam { + /// Create a new IPAM instance, persisting the CIDR config + pub fn new(storage: Arc, cidr_str: &str) -> Result { + let cidr = parse_cidr(cidr_str)?; + + // Persist the CIDR configuration + storage.put(IPAM_CIDR_KEY, cidr_str.as_bytes())?; + + debug!( + "IPAM initialized: network={}, gateway={}, first_host={}, broadcast={}, prefix_len={}", + cidr.network, cidr.gateway, cidr.first_host, cidr.broadcast, cidr.prefix_len + ); + + Ok(Self { storage, cidr }) + } + + /// Allocate an IP for a pod. Idempotent: returns existing allocation if one exists. + pub fn allocate(&self, namespace: &str, pod_name: &str) -> Result { + let pod_key = format!("{}/{}", namespace, pod_name); + + // Check if this pod already has an allocation + let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?; + for (key, value) in &allocations { + let existing_pod = String::from_utf8_lossy(value); + if existing_pod == pod_key { + // Parse the IP from the key: "ipam/alloc/{ip}" + let key_str = String::from_utf8_lossy(key); + let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..]; + if let Ok(ip) = ip_str.parse::() { + debug!("IPAM: returning existing allocation {} for {}", ip, pod_key); + return Ok(IpAllocation { + ip_address: ip, + gateway: self.cidr.gateway, + prefix_len: self.cidr.prefix_len, + }); + } + } + } + + // Collect already-allocated IPs + let allocated: std::collections::HashSet = allocations + .iter() + .filter_map(|(key, _)| { + let key_str = String::from_utf8_lossy(key); + let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..]; + ip_str.parse::().ok() + }) + .collect(); + + // Find next free IP starting from first_host + let mut candidate = self.cidr.first_host; + loop { + if candidate >= self.cidr.broadcast { + return Err(RuntimeError::IpamPoolExhausted { + cidr: format!("{}/{}", self.cidr.network, self.cidr.prefix_len), + }); + } + + if !allocated.contains(&candidate) { + // Allocate this IP + let alloc_key = format!("ipam/alloc/{}", candidate); + self.storage.put(alloc_key.as_bytes(), pod_key.as_bytes())?; + + debug!("IPAM: allocated {} for {}", candidate, pod_key); + return Ok(IpAllocation { + ip_address: candidate, + gateway: self.cidr.gateway, + prefix_len: self.cidr.prefix_len, + }); + } + + candidate = next_ip(candidate); + } + } + + /// Release the IP allocated to a pod + pub fn release(&self, namespace: &str, pod_name: &str) -> Result> { + let pod_key = format!("{}/{}", namespace, pod_name); + + let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?; + for (key, value) in &allocations { + let existing_pod = String::from_utf8_lossy(value); + if existing_pod == pod_key { + let key_str = String::from_utf8_lossy(key); + let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..]; + let ip = ip_str.parse::().ok(); + + self.storage.delete(key)?; + debug!("IPAM: released {:?} for {}", ip, pod_key); + return Ok(ip); + } + } + + debug!("IPAM: no allocation found for {}", pod_key); + Ok(None) + } + + /// Get all current allocations + pub fn get_all_allocations(&self) -> Result> { + let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?; + let mut result = BTreeMap::new(); + + for (key, value) in &allocations { + let key_str = String::from_utf8_lossy(key); + let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..]; + if let Ok(ip) = ip_str.parse::() { + result.insert(ip, String::from_utf8_lossy(value).into_owned()); + } + } + + Ok(result) + } +} + +/// Parse a CIDR string like "10.88.0.0/16" into a CidrConfig +pub fn parse_cidr(cidr_str: &str) -> Result { + let parts: Vec<&str> = cidr_str.split('/').collect(); + if parts.len() != 2 { + return Err(RuntimeError::invalid_config( + format!("Invalid CIDR format: '{}'", cidr_str), + "Use format like '10.88.0.0/16'", + )); + } + + let network: Ipv4Addr = parts[0].parse().map_err(|_| { + RuntimeError::invalid_config( + format!("Invalid network address: '{}'", parts[0]), + "Use a valid IPv4 address like '10.88.0.0'", + ) + })?; + + let prefix_len: u8 = parts[1].parse().map_err(|_| { + RuntimeError::invalid_config( + format!("Invalid prefix length: '{}'", parts[1]), + "Use a number between 0 and 32", + ) + })?; + + if prefix_len > 32 { + return Err(RuntimeError::invalid_config( + format!("Prefix length {} is out of range", prefix_len), + "Use a number between 0 and 32", + )); + } + + let network_u32 = u32::from(network); + let host_bits = 32 - prefix_len; + let mask = if prefix_len == 0 { + 0u32 + } else { + !((1u32 << host_bits) - 1) + }; + let broadcast_u32 = network_u32 | !mask; + + let gateway = Ipv4Addr::from(network_u32 + 1); + let first_host = Ipv4Addr::from(network_u32 + 2); + let broadcast = Ipv4Addr::from(broadcast_u32); + + Ok(CidrConfig { + network, + prefix_len, + gateway, + first_host, + broadcast, + }) +} + +/// Increment an IPv4 address by one +fn next_ip(ip: Ipv4Addr) -> Ipv4Addr { + Ipv4Addr::from(u32::from(ip) + 1) +} + +#[cfg(test)] +mod tests { + use super::*; + use reddwarf_storage::RedbBackend; + use tempfile::tempdir; + + fn make_test_ipam(cidr: &str) -> Ipam { + let dir = tempdir().unwrap(); + let db_path = dir.path().join("test-ipam.redb"); + let storage = Arc::new(RedbBackend::new(&db_path).unwrap()); + // We need to keep tempdir alive for the duration, but for tests + // we leak it to avoid dropping the temp dir too early + std::mem::forget(dir); + Ipam::new(storage, cidr).unwrap() + } + + #[test] + fn test_parse_cidr_valid() { + let cidr = parse_cidr("10.88.0.0/16").unwrap(); + assert_eq!(cidr.network, Ipv4Addr::new(10, 88, 0, 0)); + assert_eq!(cidr.prefix_len, 16); + assert_eq!(cidr.gateway, Ipv4Addr::new(10, 88, 0, 1)); + assert_eq!(cidr.first_host, Ipv4Addr::new(10, 88, 0, 2)); + assert_eq!(cidr.broadcast, Ipv4Addr::new(10, 88, 255, 255)); + } + + #[test] + fn test_parse_cidr_slash24() { + let cidr = parse_cidr("192.168.1.0/24").unwrap(); + assert_eq!(cidr.network, Ipv4Addr::new(192, 168, 1, 0)); + assert_eq!(cidr.gateway, Ipv4Addr::new(192, 168, 1, 1)); + assert_eq!(cidr.first_host, Ipv4Addr::new(192, 168, 1, 2)); + assert_eq!(cidr.broadcast, Ipv4Addr::new(192, 168, 1, 255)); + } + + #[test] + fn test_parse_cidr_invalid() { + assert!(parse_cidr("not-a-cidr").is_err()); + assert!(parse_cidr("10.88.0.0").is_err()); + assert!(parse_cidr("10.88.0.0/33").is_err()); + assert!(parse_cidr("bad/16").is_err()); + } + + #[test] + fn test_allocate_sequential() { + let ipam = make_test_ipam("10.88.0.0/16"); + + let alloc1 = ipam.allocate("default", "pod-a").unwrap(); + assert_eq!(alloc1.ip_address, Ipv4Addr::new(10, 88, 0, 2)); + assert_eq!(alloc1.gateway, Ipv4Addr::new(10, 88, 0, 1)); + assert_eq!(alloc1.prefix_len, 16); + + let alloc2 = ipam.allocate("default", "pod-b").unwrap(); + assert_eq!(alloc2.ip_address, Ipv4Addr::new(10, 88, 0, 3)); + } + + #[test] + fn test_allocate_idempotent() { + let ipam = make_test_ipam("10.88.0.0/16"); + + let alloc1 = ipam.allocate("default", "pod-a").unwrap(); + let alloc2 = ipam.allocate("default", "pod-a").unwrap(); + assert_eq!(alloc1.ip_address, alloc2.ip_address); + } + + #[test] + fn test_release_and_reallocate() { + let ipam = make_test_ipam("10.88.0.0/16"); + + let alloc1 = ipam.allocate("default", "pod-a").unwrap(); + let first_ip = alloc1.ip_address; + + // Allocate a second pod + let _alloc2 = ipam.allocate("default", "pod-b").unwrap(); + + // Release first pod + let released = ipam.release("default", "pod-a").unwrap(); + assert_eq!(released, Some(first_ip)); + + // New pod should reuse the freed IP + let alloc3 = ipam.allocate("default", "pod-c").unwrap(); + assert_eq!(alloc3.ip_address, first_ip); + } + + #[test] + fn test_pool_exhaustion() { + // /30 gives us network .0, gateway .1, one host .2, broadcast .3 + let ipam = make_test_ipam("10.0.0.0/30"); + + // First allocation should succeed (.2) + let alloc = ipam.allocate("default", "pod-a").unwrap(); + assert_eq!(alloc.ip_address, Ipv4Addr::new(10, 0, 0, 2)); + + // Second allocation should fail (only .2 is usable, .3 is broadcast) + let result = ipam.allocate("default", "pod-b"); + assert!(matches!( + result.unwrap_err(), + RuntimeError::IpamPoolExhausted { .. } + )); + } + + #[test] + fn test_get_all_allocations() { + let ipam = make_test_ipam("10.88.0.0/16"); + + ipam.allocate("default", "pod-a").unwrap(); + ipam.allocate("kube-system", "pod-b").unwrap(); + + let allocs = ipam.get_all_allocations().unwrap(); + assert_eq!(allocs.len(), 2); + assert_eq!(allocs[&Ipv4Addr::new(10, 88, 0, 2)], "default/pod-a"); + assert_eq!(allocs[&Ipv4Addr::new(10, 88, 0, 3)], "kube-system/pod-b"); + } + + #[test] + fn test_release_nonexistent() { + let ipam = make_test_ipam("10.88.0.0/16"); + let released = ipam.release("default", "nonexistent").unwrap(); + assert_eq!(released, None); + } +} diff --git a/crates/reddwarf-runtime/src/network/mod.rs b/crates/reddwarf-runtime/src/network/mod.rs index ce27e82..baf26f5 100644 --- a/crates/reddwarf-runtime/src/network/mod.rs +++ b/crates/reddwarf-runtime/src/network/mod.rs @@ -1,6 +1,8 @@ +pub mod ipam; pub mod types; pub use crate::types::{DirectNicConfig, EtherstubConfig, NetworkMode}; +pub use ipam::{CidrConfig, IpAllocation, Ipam}; /// Generate a VNIC name from pod namespace and name pub fn vnic_name_for_pod(namespace: &str, pod_name: &str) -> String { diff --git a/crates/reddwarf-runtime/src/types.rs b/crates/reddwarf-runtime/src/types.rs index 32f6527..91cb3b6 100644 --- a/crates/reddwarf-runtime/src/types.rs +++ b/crates/reddwarf-runtime/src/types.rs @@ -104,6 +104,8 @@ pub struct EtherstubConfig { pub ip_address: String, /// Gateway address pub gateway: String, + /// CIDR prefix length (e.g., 16 for /16) + pub prefix_len: u8, } /// Direct NIC-based network configuration @@ -117,6 +119,8 @@ pub struct DirectNicConfig { pub ip_address: String, /// Gateway address pub gateway: String, + /// CIDR prefix length (e.g., 16 for /16) + pub prefix_len: u8, } /// ZFS dataset configuration for zone storage diff --git a/crates/reddwarf-runtime/src/zone/config.rs b/crates/reddwarf-runtime/src/zone/config.rs index 4619cd1..c592be5 100644 --- a/crates/reddwarf-runtime/src/zone/config.rs +++ b/crates/reddwarf-runtime/src/zone/config.rs @@ -11,12 +11,24 @@ pub fn generate_zonecfg(config: &ZoneConfig) -> Result { lines.push("set ip-type=exclusive".to_string()); // Network resource - let vnic_name = match &config.network { - NetworkMode::Etherstub(cfg) => &cfg.vnic_name, - NetworkMode::Direct(cfg) => &cfg.vnic_name, + let (vnic_name, ip_address, gateway, prefix_len) = match &config.network { + NetworkMode::Etherstub(cfg) => ( + &cfg.vnic_name, + &cfg.ip_address, + &cfg.gateway, + cfg.prefix_len, + ), + NetworkMode::Direct(cfg) => ( + &cfg.vnic_name, + &cfg.ip_address, + &cfg.gateway, + cfg.prefix_len, + ), }; lines.push("add net".to_string()); lines.push(format!("set physical={}", vnic_name)); + lines.push(format!("set allowed-address={}/{}", ip_address, prefix_len)); + lines.push(format!("set defrouter={}", gateway)); lines.push("end".to_string()); // CPU cap @@ -67,6 +79,7 @@ mod tests { vnic_name: "vnic0".to_string(), ip_address: "10.0.0.2".to_string(), gateway: "10.0.0.1".to_string(), + prefix_len: 16, }), zfs: ZfsConfig { parent_dataset: "rpool/zones".to_string(), @@ -85,6 +98,8 @@ mod tests { assert!(result.contains("set zonepath=/zones/test-zone")); assert!(result.contains("set ip-type=exclusive")); assert!(result.contains("set physical=vnic0")); + assert!(result.contains("set allowed-address=10.0.0.2/16")); + assert!(result.contains("set defrouter=10.0.0.1")); assert!(result.contains("set ncpus=2.0")); assert!(result.contains("set physical=1G")); assert!(result.contains("verify")); @@ -102,6 +117,7 @@ mod tests { vnic_name: "vnic1".to_string(), ip_address: "192.168.1.10".to_string(), gateway: "192.168.1.1".to_string(), + prefix_len: 24, }), zfs: ZfsConfig { parent_dataset: "rpool/zones".to_string(), @@ -128,6 +144,8 @@ mod tests { let result = generate_zonecfg(&config).unwrap(); assert!(result.contains("set brand=reddwarf")); assert!(result.contains("set physical=vnic1")); + assert!(result.contains("set allowed-address=192.168.1.10/24")); + assert!(result.contains("set defrouter=192.168.1.1")); assert!(result.contains("set physical=512M")); assert!(result.contains("add fs")); assert!(result.contains("set dir=/etc/app")); diff --git a/crates/reddwarf/src/main.rs b/crates/reddwarf/src/main.rs index 5bf85f1..78a0a63 100644 --- a/crates/reddwarf/src/main.rs +++ b/crates/reddwarf/src/main.rs @@ -2,8 +2,8 @@ use clap::{Parser, Subcommand}; use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig}; use reddwarf_core::Namespace; use reddwarf_runtime::{ - ApiClient, EtherstubConfig, MockRuntime, NetworkMode, NodeAgent, NodeAgentConfig, - PodController, PodControllerConfig, ZoneBrand, + ApiClient, Ipam, MockRuntime, NodeAgent, NodeAgentConfig, PodController, PodControllerConfig, + ZoneBrand, }; use reddwarf_scheduler::scheduler::SchedulerConfig; use reddwarf_scheduler::Scheduler; @@ -48,6 +48,12 @@ enum Commands { /// Parent ZFS dataset for zone storage #[arg(long, default_value = "rpool/zones")] zfs_parent: String, + /// Pod network CIDR for IPAM allocation + #[arg(long, default_value = "10.88.0.0/16")] + pod_cidr: String, + /// Etherstub name for pod networking + #[arg(long, default_value = "reddwarf0")] + etherstub_name: String, }, } @@ -71,7 +77,20 @@ async fn main() -> miette::Result<()> { data_dir, zonepath_prefix, zfs_parent, - } => run_agent(&node_name, &bind, &data_dir, &zonepath_prefix, &zfs_parent).await, + pod_cidr, + etherstub_name, + } => { + run_agent( + &node_name, + &bind, + &data_dir, + &zonepath_prefix, + &zfs_parent, + &pod_cidr, + ðerstub_name, + ) + .await + } } } @@ -105,6 +124,8 @@ async fn run_agent( data_dir: &str, zonepath_prefix: &str, zfs_parent: &str, + pod_cidr: &str, + etherstub_name: &str, ) -> miette::Result<()> { info!("Starting reddwarf agent for node '{}'", node_name); @@ -158,7 +179,12 @@ async fn run_agent( // 3. Create runtime (MockRuntime on non-illumos, IllumosRuntime on illumos) let runtime: Arc = create_runtime(); - // 4. Spawn pod controller + // 4. Create IPAM for per-pod IP allocation + let ipam = Ipam::new(state.storage.clone(), pod_cidr).map_err(|e| { + miette::miette!("Failed to initialize IPAM with CIDR '{}': {}", pod_cidr, e) + })?; + + // 5. Spawn pod controller let api_client = Arc::new(ApiClient::new(&api_url)); let controller_config = PodControllerConfig { node_name: node_name.to_string(), @@ -166,12 +192,8 @@ async fn run_agent( zonepath_prefix: zonepath_prefix.to_string(), zfs_parent_dataset: zfs_parent.to_string(), default_brand: ZoneBrand::Reddwarf, - network: NetworkMode::Etherstub(EtherstubConfig { - etherstub_name: "reddwarf0".to_string(), - vnic_name: "reddwarf_vnic0".to_string(), - ip_address: "10.88.0.2".to_string(), - gateway: "10.88.0.1".to_string(), - }), + etherstub_name: etherstub_name.to_string(), + pod_cidr: pod_cidr.to_string(), }; let controller = PodController::new( @@ -179,6 +201,7 @@ async fn run_agent( api_client.clone(), state.event_tx.clone(), controller_config, + ipam, ); let controller_token = token.clone(); let controller_handle = tokio::spawn(async move { @@ -187,7 +210,7 @@ async fn run_agent( } }); - // 5. Spawn node agent + // 6. Spawn node agent let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url); let node_agent = NodeAgent::new(api_client, node_agent_config); let agent_token = token.clone(); @@ -198,8 +221,8 @@ async fn run_agent( }); info!( - "All components started. API server on {}, node name: {}", - bind, node_name + "All components started. API server on {}, node name: {}, pod CIDR: {}", + bind, node_name, pod_cidr ); // Wait for shutdown signal