Add pod networking: IPAM, per-pod VNICs, and zone IP configuration

Each pod now gets a unique VNIC name and IP address from a configurable
CIDR pool, with IPs released on pod deletion. This replaces the
hardcoded single VNIC/IP that prevented multiple pods from running.

- Add redb-backed IPAM module with allocate/release/idempotent semantics
- Add prefix_len to EtherstubConfig and DirectNicConfig
- Generate allowed-address and defrouter in zonecfg net blocks
- Wire vnic_name_for_pod() into controller for unique VNIC names
- Add --pod-cidr and --etherstub-name CLI flags to agent subcommand
- Add StorageError and IpamPoolExhausted error variants with diagnostics

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Till Wegmueller 2026-02-09 00:17:45 +01:00
parent c50ecb2664
commit 57186ebe68
No known key found for this signature in database
12 changed files with 609 additions and 118 deletions

1
Cargo.lock generated
View file

@ -1393,6 +1393,7 @@ dependencies = [
"k8s-openapi", "k8s-openapi",
"miette", "miette",
"reddwarf-core", "reddwarf-core",
"reddwarf-storage",
"reqwest", "reqwest",
"serde", "serde",
"serde_json", "serde_json",

View file

@ -9,6 +9,7 @@ rust-version.workspace = true
[dependencies] [dependencies]
reddwarf-core = { workspace = true } reddwarf-core = { workspace = true }
reddwarf-storage = { workspace = true }
k8s-openapi = { workspace = true } k8s-openapi = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tokio-stream = { workspace = true } tokio-stream = { workspace = true }

View file

@ -28,6 +28,7 @@ mod tests {
vnic_name: "vnic0".to_string(), vnic_name: "vnic0".to_string(),
ip_address: "10.0.0.2".to_string(), ip_address: "10.0.0.2".to_string(),
gateway: "10.0.0.1".to_string(), gateway: "10.0.0.1".to_string(),
prefix_len: 16,
}), }),
zfs: ZfsConfig { zfs: ZfsConfig {
parent_dataset: "rpool/zones".to_string(), parent_dataset: "rpool/zones".to_string(),

View file

@ -1,5 +1,6 @@
use crate::api_client::ApiClient; use crate::api_client::ApiClient;
use crate::error::{Result, RuntimeError}; use crate::error::{Result, RuntimeError};
use crate::network::{vnic_name_for_pod, Ipam};
use crate::traits::ZoneRuntime; use crate::traits::ZoneRuntime;
use crate::types::*; use crate::types::*;
use k8s_openapi::api::core::v1::{Pod, PodCondition, PodStatus}; use k8s_openapi::api::core::v1::{Pod, PodCondition, PodStatus};
@ -22,8 +23,10 @@ pub struct PodControllerConfig {
pub zfs_parent_dataset: String, pub zfs_parent_dataset: String,
/// Default zone brand /// Default zone brand
pub default_brand: ZoneBrand, pub default_brand: ZoneBrand,
/// Default network configuration /// Name of the etherstub for pod networking
pub network: NetworkMode, pub etherstub_name: String,
/// Pod CIDR (e.g., "10.88.0.0/16")
pub pod_cidr: String,
} }
/// Pod controller that watches for Pod events and drives zone lifecycle /// Pod controller that watches for Pod events and drives zone lifecycle
@ -32,6 +35,7 @@ pub struct PodController {
api_client: Arc<ApiClient>, api_client: Arc<ApiClient>,
event_tx: broadcast::Sender<ResourceEvent>, event_tx: broadcast::Sender<ResourceEvent>,
config: PodControllerConfig, config: PodControllerConfig,
ipam: Ipam,
} }
impl PodController { impl PodController {
@ -40,12 +44,14 @@ impl PodController {
api_client: Arc<ApiClient>, api_client: Arc<ApiClient>,
event_tx: broadcast::Sender<ResourceEvent>, event_tx: broadcast::Sender<ResourceEvent>,
config: PodControllerConfig, config: PodControllerConfig,
ipam: Ipam,
) -> Self { ) -> Self {
Self { Self {
runtime, runtime,
api_client, api_client,
event_tx, event_tx,
config, config,
ipam,
} }
} }
@ -205,7 +211,7 @@ impl PodController {
"" | "Pending" => { "" | "Pending" => {
// Pod is assigned to us but has no phase — provision it // Pod is assigned to us but has no phase — provision it
info!("Provisioning zone for pod {}/{}", namespace, pod_name); info!("Provisioning zone for pod {}/{}", namespace, pod_name);
let zone_config = pod_to_zone_config(pod, &self.config)?; let zone_config = self.pod_to_zone_config(pod)?;
match self.runtime.provision(&zone_config).await { match self.runtime.provision(&zone_config).await {
Ok(()) => { Ok(()) => {
@ -328,7 +334,7 @@ impl PodController {
Ok(()) Ok(())
} }
/// Handle pod deletion — deprovision the zone /// Handle pod deletion — deprovision the zone and release IP
pub async fn handle_delete(&self, pod: &Pod) -> Result<()> { pub async fn handle_delete(&self, pod: &Pod) -> Result<()> {
let pod_name = pod let pod_name = pod
.metadata .metadata
@ -348,7 +354,7 @@ impl PodController {
} }
} }
let zone_config = pod_to_zone_config(pod, &self.config)?; let zone_config = self.pod_to_zone_config(pod)?;
info!( info!(
"Deprovisioning zone for deleted pod {}/{}", "Deprovisioning zone for deleted pod {}/{}",
namespace, pod_name namespace, pod_name
@ -361,9 +367,96 @@ impl PodController {
); );
} }
// Release the IP allocation
if let Err(e) = self.ipam.release(namespace, pod_name) {
warn!(
"Failed to release IP for pod {}/{}: {}",
namespace, pod_name, e
);
}
Ok(()) Ok(())
} }
/// Convert a Pod spec to a ZoneConfig with per-pod VNIC and IP
fn pod_to_zone_config(&self, pod: &Pod) -> Result<ZoneConfig> {
let pod_name = pod
.metadata
.name
.as_deref()
.ok_or_else(|| RuntimeError::internal_error("Pod has no name"))?;
let namespace = pod.metadata.namespace.as_deref().unwrap_or("default");
let spec = pod
.spec
.as_ref()
.ok_or_else(|| RuntimeError::internal_error("Pod has no spec"))?;
let zone_name = pod_zone_name(namespace, pod_name);
let zonepath = format!("{}/{}", self.config.zonepath_prefix, zone_name);
// Allocate a unique VNIC name and IP for this pod
let vnic_name = vnic_name_for_pod(namespace, pod_name);
let allocation = self.ipam.allocate(namespace, pod_name)?;
let network = NetworkMode::Etherstub(EtherstubConfig {
etherstub_name: self.config.etherstub_name.clone(),
vnic_name,
ip_address: allocation.ip_address.to_string(),
gateway: allocation.gateway.to_string(),
prefix_len: allocation.prefix_len,
});
// Map containers to ContainerProcess entries
let processes: Vec<ContainerProcess> = spec
.containers
.iter()
.map(|c| {
let command = c
.command
.clone()
.unwrap_or_default()
.into_iter()
.chain(c.args.clone().unwrap_or_default())
.collect::<Vec<_>>();
let env = c
.env
.as_ref()
.map(|envs| {
envs.iter()
.filter_map(|e| e.value.as_ref().map(|v| (e.name.clone(), v.clone())))
.collect::<Vec<_>>()
})
.unwrap_or_default();
ContainerProcess {
name: c.name.clone(),
command,
working_dir: c.working_dir.clone(),
env,
}
})
.collect();
Ok(ZoneConfig {
zone_name,
brand: self.config.default_brand.clone(),
zonepath,
network,
zfs: ZfsConfig {
parent_dataset: self.config.zfs_parent_dataset.clone(),
clone_from: None,
quota: None,
},
lx_image_path: None,
processes,
cpu_cap: None,
memory_cap: None,
fs_mounts: vec![],
})
}
/// Extract IP address from zone config network /// Extract IP address from zone config network
fn zone_ip(&self, config: &ZoneConfig) -> String { fn zone_ip(&self, config: &ZoneConfig) -> String {
match &config.network { match &config.network {
@ -396,77 +489,38 @@ pub fn pod_zone_name(namespace: &str, pod_name: &str) -> String {
} }
} }
/// Convert a Pod spec to a ZoneConfig for the runtime
pub fn pod_to_zone_config(pod: &Pod, config: &PodControllerConfig) -> Result<ZoneConfig> {
let pod_name = pod
.metadata
.name
.as_deref()
.ok_or_else(|| RuntimeError::internal_error("Pod has no name"))?;
let namespace = pod.metadata.namespace.as_deref().unwrap_or("default");
let spec = pod
.spec
.as_ref()
.ok_or_else(|| RuntimeError::internal_error("Pod has no spec"))?;
let zone_name = pod_zone_name(namespace, pod_name);
let zonepath = format!("{}/{}", config.zonepath_prefix, zone_name);
// Map containers to ContainerProcess entries
let processes: Vec<ContainerProcess> = spec
.containers
.iter()
.map(|c| {
let command = c
.command
.clone()
.unwrap_or_default()
.into_iter()
.chain(c.args.clone().unwrap_or_default())
.collect::<Vec<_>>();
let env = c
.env
.as_ref()
.map(|envs| {
envs.iter()
.filter_map(|e| e.value.as_ref().map(|v| (e.name.clone(), v.clone())))
.collect::<Vec<_>>()
})
.unwrap_or_default();
ContainerProcess {
name: c.name.clone(),
command,
working_dir: c.working_dir.clone(),
env,
}
})
.collect();
Ok(ZoneConfig {
zone_name,
brand: config.default_brand.clone(),
zonepath,
network: config.network.clone(),
zfs: ZfsConfig {
parent_dataset: config.zfs_parent_dataset.clone(),
clone_from: None,
quota: None,
},
lx_image_path: None,
processes,
cpu_cap: None,
memory_cap: None,
fs_mounts: vec![],
})
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::network::Ipam;
use k8s_openapi::api::core::v1::{Container, PodSpec}; use k8s_openapi::api::core::v1::{Container, PodSpec};
use reddwarf_storage::RedbBackend;
use std::net::Ipv4Addr;
use tempfile::tempdir;
fn make_test_controller() -> (PodController, tempfile::TempDir) {
let dir = tempdir().unwrap();
let db_path = dir.path().join("test-controller.redb");
let storage = Arc::new(RedbBackend::new(&db_path).unwrap());
let ipam = Ipam::new(storage, "10.88.0.0/16").unwrap();
let runtime = Arc::new(crate::mock::MockRuntime::new());
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
let (event_tx, _) = broadcast::channel(16);
let config = PodControllerConfig {
node_name: "node1".to_string(),
api_url: "http://127.0.0.1:6443".to_string(),
zonepath_prefix: "/zones".to_string(),
zfs_parent_dataset: "rpool/zones".to_string(),
default_brand: ZoneBrand::Reddwarf,
etherstub_name: "reddwarf0".to_string(),
pod_cidr: "10.88.0.0/16".to_string(),
};
let controller = PodController::new(runtime, api_client, event_tx, config, ipam);
(controller, dir)
}
#[test] #[test]
fn test_pod_zone_name_basic() { fn test_pod_zone_name_basic() {
@ -491,6 +545,8 @@ mod tests {
#[test] #[test]
fn test_pod_to_zone_config_maps_containers() { fn test_pod_to_zone_config_maps_containers() {
let (controller, _dir) = make_test_controller();
let mut pod = Pod::default(); let mut pod = Pod::default();
pod.metadata.name = Some("test-pod".to_string()); pod.metadata.name = Some("test-pod".to_string());
pod.metadata.namespace = Some("default".to_string()); pod.metadata.namespace = Some("default".to_string());
@ -511,21 +567,7 @@ mod tests {
..Default::default() ..Default::default()
}); });
let config = PodControllerConfig { let zone_config = controller.pod_to_zone_config(&pod).unwrap();
node_name: "node1".to_string(),
api_url: "http://127.0.0.1:6443".to_string(),
zonepath_prefix: "/zones".to_string(),
zfs_parent_dataset: "rpool/zones".to_string(),
default_brand: ZoneBrand::Reddwarf,
network: NetworkMode::Etherstub(EtherstubConfig {
etherstub_name: "reddwarf0".to_string(),
vnic_name: "vnic0".to_string(),
ip_address: "10.0.0.2".to_string(),
gateway: "10.0.0.1".to_string(),
}),
};
let zone_config = pod_to_zone_config(&pod, &config).unwrap();
assert_eq!(zone_config.zone_name, "reddwarf-default-test-pod"); assert_eq!(zone_config.zone_name, "reddwarf-default-test-pod");
assert_eq!(zone_config.zonepath, "/zones/reddwarf-default-test-pod"); assert_eq!(zone_config.zonepath, "/zones/reddwarf-default-test-pod");
@ -539,29 +581,74 @@ mod tests {
assert_eq!(zone_config.processes[1].command, vec!["/bin/sh", "-c"]); assert_eq!(zone_config.processes[1].command, vec!["/bin/sh", "-c"]);
assert_eq!(zone_config.brand, ZoneBrand::Reddwarf); assert_eq!(zone_config.brand, ZoneBrand::Reddwarf);
assert_eq!(zone_config.zfs.parent_dataset, "rpool/zones"); assert_eq!(zone_config.zfs.parent_dataset, "rpool/zones");
// Verify per-pod networking
match &zone_config.network {
NetworkMode::Etherstub(cfg) => {
assert_eq!(cfg.etherstub_name, "reddwarf0");
assert_eq!(cfg.vnic_name, "vnic_default_test_pod");
assert_eq!(cfg.ip_address, Ipv4Addr::new(10, 88, 0, 2).to_string());
assert_eq!(cfg.gateway, Ipv4Addr::new(10, 88, 0, 1).to_string());
assert_eq!(cfg.prefix_len, 16);
}
_ => panic!("Expected Etherstub network mode"),
}
}
#[test]
fn test_pod_to_zone_config_unique_ips() {
let (controller, _dir) = make_test_controller();
let mut pod_a = Pod::default();
pod_a.metadata.name = Some("pod-a".to_string());
pod_a.metadata.namespace = Some("default".to_string());
pod_a.spec = Some(PodSpec {
containers: vec![Container {
name: "web".to_string(),
command: Some(vec!["/bin/sh".to_string()]),
..Default::default()
}],
..Default::default()
});
let mut pod_b = Pod::default();
pod_b.metadata.name = Some("pod-b".to_string());
pod_b.metadata.namespace = Some("default".to_string());
pod_b.spec = Some(PodSpec {
containers: vec![Container {
name: "web".to_string(),
command: Some(vec!["/bin/sh".to_string()]),
..Default::default()
}],
..Default::default()
});
let config_a = controller.pod_to_zone_config(&pod_a).unwrap();
let config_b = controller.pod_to_zone_config(&pod_b).unwrap();
let ip_a = match &config_a.network {
NetworkMode::Etherstub(cfg) => cfg.ip_address.clone(),
_ => panic!("Expected Etherstub"),
};
let ip_b = match &config_b.network {
NetworkMode::Etherstub(cfg) => cfg.ip_address.clone(),
_ => panic!("Expected Etherstub"),
};
assert_ne!(ip_a, ip_b, "Each pod should get a unique IP");
assert_eq!(ip_a, "10.88.0.2");
assert_eq!(ip_b, "10.88.0.3");
} }
#[test] #[test]
fn test_pod_to_zone_config_no_spec_returns_error() { fn test_pod_to_zone_config_no_spec_returns_error() {
let (controller, _dir) = make_test_controller();
let mut pod = Pod::default(); let mut pod = Pod::default();
pod.metadata.name = Some("test-pod".to_string()); pod.metadata.name = Some("test-pod".to_string());
// No spec set // No spec set
let config = PodControllerConfig { let result = controller.pod_to_zone_config(&pod);
node_name: "node1".to_string(),
api_url: "http://127.0.0.1:6443".to_string(),
zonepath_prefix: "/zones".to_string(),
zfs_parent_dataset: "rpool/zones".to_string(),
default_brand: ZoneBrand::Reddwarf,
network: NetworkMode::Etherstub(EtherstubConfig {
etherstub_name: "reddwarf0".to_string(),
vnic_name: "vnic0".to_string(),
ip_address: "10.0.0.2".to_string(),
gateway: "10.0.0.1".to_string(),
}),
};
let result = pod_to_zone_config(&pod, &config);
assert!(result.is_err()); assert!(result.is_err());
} }
} }

View file

@ -115,6 +115,22 @@ pub enum RuntimeError {
#[diagnostic(transparent)] #[diagnostic(transparent)]
CoreError(#[from] reddwarf_core::ReddwarfError), CoreError(#[from] reddwarf_core::ReddwarfError),
/// Storage error
#[error(transparent)]
#[diagnostic(transparent)]
StorageError(#[from] reddwarf_storage::StorageError),
/// IP address pool exhausted
#[error("IPAM pool exhausted: no free addresses in {cidr}")]
#[diagnostic(
code(reddwarf::runtime::ipam_pool_exhausted),
help("Expand the pod CIDR range or delete unused pods to free addresses")
)]
IpamPoolExhausted {
#[allow(unused)]
cidr: String,
},
/// Internal error /// Internal error
#[error("Internal runtime error: {message}")] #[error("Internal runtime error: {message}")]
#[diagnostic( #[diagnostic(

View file

@ -19,6 +19,7 @@ pub mod zone;
// Re-export primary types // Re-export primary types
pub use error::{Result, RuntimeError}; pub use error::{Result, RuntimeError};
pub use mock::MockRuntime; pub use mock::MockRuntime;
pub use network::{CidrConfig, IpAllocation, Ipam};
pub use traits::ZoneRuntime; pub use traits::ZoneRuntime;
pub use types::{ pub use types::{
ContainerProcess, DirectNicConfig, EtherstubConfig, FsMount, NetworkMode, ZfsConfig, ZoneBrand, ContainerProcess, DirectNicConfig, EtherstubConfig, FsMount, NetworkMode, ZfsConfig, ZoneBrand,

View file

@ -312,6 +312,7 @@ mod tests {
vnic_name: format!("vnic_{}", name), vnic_name: format!("vnic_{}", name),
ip_address: "10.0.0.2".to_string(), ip_address: "10.0.0.2".to_string(),
gateway: "10.0.0.1".to_string(), gateway: "10.0.0.1".to_string(),
prefix_len: 16,
}), }),
zfs: ZfsConfig { zfs: ZfsConfig {
parent_dataset: "rpool/zones".to_string(), parent_dataset: "rpool/zones".to_string(),

View file

@ -0,0 +1,336 @@
use crate::error::{Result, RuntimeError};
use reddwarf_storage::KVStore;
use std::collections::BTreeMap;
use std::net::Ipv4Addr;
use std::sync::Arc;
use tracing::debug;
/// Parsed CIDR configuration
#[derive(Debug, Clone)]
pub struct CidrConfig {
/// Base network address
pub network: Ipv4Addr,
/// CIDR prefix length
pub prefix_len: u8,
/// Gateway address (network + 1)
pub gateway: Ipv4Addr,
/// First allocatable host address (network + 2)
pub first_host: Ipv4Addr,
/// Broadcast address (last in range)
pub broadcast: Ipv4Addr,
}
/// An allocated IP for a pod
#[derive(Debug, Clone)]
pub struct IpAllocation {
pub ip_address: Ipv4Addr,
pub gateway: Ipv4Addr,
pub prefix_len: u8,
}
/// IPAM (IP Address Management) backed by a KVStore
///
/// Storage keys:
/// - `ipam/_cidr` → the CIDR string (e.g. "10.88.0.0/16")
/// - `ipam/alloc/{ip}` → `"{namespace}/{pod_name}"`
pub struct Ipam {
storage: Arc<dyn KVStore>,
cidr: CidrConfig,
}
const IPAM_CIDR_KEY: &[u8] = b"ipam/_cidr";
const IPAM_ALLOC_PREFIX: &[u8] = b"ipam/alloc/";
impl Ipam {
/// Create a new IPAM instance, persisting the CIDR config
pub fn new(storage: Arc<dyn KVStore>, cidr_str: &str) -> Result<Self> {
let cidr = parse_cidr(cidr_str)?;
// Persist the CIDR configuration
storage.put(IPAM_CIDR_KEY, cidr_str.as_bytes())?;
debug!(
"IPAM initialized: network={}, gateway={}, first_host={}, broadcast={}, prefix_len={}",
cidr.network, cidr.gateway, cidr.first_host, cidr.broadcast, cidr.prefix_len
);
Ok(Self { storage, cidr })
}
/// Allocate an IP for a pod. Idempotent: returns existing allocation if one exists.
pub fn allocate(&self, namespace: &str, pod_name: &str) -> Result<IpAllocation> {
let pod_key = format!("{}/{}", namespace, pod_name);
// Check if this pod already has an allocation
let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?;
for (key, value) in &allocations {
let existing_pod = String::from_utf8_lossy(value);
if existing_pod == pod_key {
// Parse the IP from the key: "ipam/alloc/{ip}"
let key_str = String::from_utf8_lossy(key);
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
if let Ok(ip) = ip_str.parse::<Ipv4Addr>() {
debug!("IPAM: returning existing allocation {} for {}", ip, pod_key);
return Ok(IpAllocation {
ip_address: ip,
gateway: self.cidr.gateway,
prefix_len: self.cidr.prefix_len,
});
}
}
}
// Collect already-allocated IPs
let allocated: std::collections::HashSet<Ipv4Addr> = allocations
.iter()
.filter_map(|(key, _)| {
let key_str = String::from_utf8_lossy(key);
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
ip_str.parse::<Ipv4Addr>().ok()
})
.collect();
// Find next free IP starting from first_host
let mut candidate = self.cidr.first_host;
loop {
if candidate >= self.cidr.broadcast {
return Err(RuntimeError::IpamPoolExhausted {
cidr: format!("{}/{}", self.cidr.network, self.cidr.prefix_len),
});
}
if !allocated.contains(&candidate) {
// Allocate this IP
let alloc_key = format!("ipam/alloc/{}", candidate);
self.storage.put(alloc_key.as_bytes(), pod_key.as_bytes())?;
debug!("IPAM: allocated {} for {}", candidate, pod_key);
return Ok(IpAllocation {
ip_address: candidate,
gateway: self.cidr.gateway,
prefix_len: self.cidr.prefix_len,
});
}
candidate = next_ip(candidate);
}
}
/// Release the IP allocated to a pod
pub fn release(&self, namespace: &str, pod_name: &str) -> Result<Option<Ipv4Addr>> {
let pod_key = format!("{}/{}", namespace, pod_name);
let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?;
for (key, value) in &allocations {
let existing_pod = String::from_utf8_lossy(value);
if existing_pod == pod_key {
let key_str = String::from_utf8_lossy(key);
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
let ip = ip_str.parse::<Ipv4Addr>().ok();
self.storage.delete(key)?;
debug!("IPAM: released {:?} for {}", ip, pod_key);
return Ok(ip);
}
}
debug!("IPAM: no allocation found for {}", pod_key);
Ok(None)
}
/// Get all current allocations
pub fn get_all_allocations(&self) -> Result<BTreeMap<Ipv4Addr, String>> {
let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?;
let mut result = BTreeMap::new();
for (key, value) in &allocations {
let key_str = String::from_utf8_lossy(key);
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
if let Ok(ip) = ip_str.parse::<Ipv4Addr>() {
result.insert(ip, String::from_utf8_lossy(value).into_owned());
}
}
Ok(result)
}
}
/// Parse a CIDR string like "10.88.0.0/16" into a CidrConfig
pub fn parse_cidr(cidr_str: &str) -> Result<CidrConfig> {
let parts: Vec<&str> = cidr_str.split('/').collect();
if parts.len() != 2 {
return Err(RuntimeError::invalid_config(
format!("Invalid CIDR format: '{}'", cidr_str),
"Use format like '10.88.0.0/16'",
));
}
let network: Ipv4Addr = parts[0].parse().map_err(|_| {
RuntimeError::invalid_config(
format!("Invalid network address: '{}'", parts[0]),
"Use a valid IPv4 address like '10.88.0.0'",
)
})?;
let prefix_len: u8 = parts[1].parse().map_err(|_| {
RuntimeError::invalid_config(
format!("Invalid prefix length: '{}'", parts[1]),
"Use a number between 0 and 32",
)
})?;
if prefix_len > 32 {
return Err(RuntimeError::invalid_config(
format!("Prefix length {} is out of range", prefix_len),
"Use a number between 0 and 32",
));
}
let network_u32 = u32::from(network);
let host_bits = 32 - prefix_len;
let mask = if prefix_len == 0 {
0u32
} else {
!((1u32 << host_bits) - 1)
};
let broadcast_u32 = network_u32 | !mask;
let gateway = Ipv4Addr::from(network_u32 + 1);
let first_host = Ipv4Addr::from(network_u32 + 2);
let broadcast = Ipv4Addr::from(broadcast_u32);
Ok(CidrConfig {
network,
prefix_len,
gateway,
first_host,
broadcast,
})
}
/// Increment an IPv4 address by one
fn next_ip(ip: Ipv4Addr) -> Ipv4Addr {
Ipv4Addr::from(u32::from(ip) + 1)
}
#[cfg(test)]
mod tests {
use super::*;
use reddwarf_storage::RedbBackend;
use tempfile::tempdir;
fn make_test_ipam(cidr: &str) -> Ipam {
let dir = tempdir().unwrap();
let db_path = dir.path().join("test-ipam.redb");
let storage = Arc::new(RedbBackend::new(&db_path).unwrap());
// We need to keep tempdir alive for the duration, but for tests
// we leak it to avoid dropping the temp dir too early
std::mem::forget(dir);
Ipam::new(storage, cidr).unwrap()
}
#[test]
fn test_parse_cidr_valid() {
let cidr = parse_cidr("10.88.0.0/16").unwrap();
assert_eq!(cidr.network, Ipv4Addr::new(10, 88, 0, 0));
assert_eq!(cidr.prefix_len, 16);
assert_eq!(cidr.gateway, Ipv4Addr::new(10, 88, 0, 1));
assert_eq!(cidr.first_host, Ipv4Addr::new(10, 88, 0, 2));
assert_eq!(cidr.broadcast, Ipv4Addr::new(10, 88, 255, 255));
}
#[test]
fn test_parse_cidr_slash24() {
let cidr = parse_cidr("192.168.1.0/24").unwrap();
assert_eq!(cidr.network, Ipv4Addr::new(192, 168, 1, 0));
assert_eq!(cidr.gateway, Ipv4Addr::new(192, 168, 1, 1));
assert_eq!(cidr.first_host, Ipv4Addr::new(192, 168, 1, 2));
assert_eq!(cidr.broadcast, Ipv4Addr::new(192, 168, 1, 255));
}
#[test]
fn test_parse_cidr_invalid() {
assert!(parse_cidr("not-a-cidr").is_err());
assert!(parse_cidr("10.88.0.0").is_err());
assert!(parse_cidr("10.88.0.0/33").is_err());
assert!(parse_cidr("bad/16").is_err());
}
#[test]
fn test_allocate_sequential() {
let ipam = make_test_ipam("10.88.0.0/16");
let alloc1 = ipam.allocate("default", "pod-a").unwrap();
assert_eq!(alloc1.ip_address, Ipv4Addr::new(10, 88, 0, 2));
assert_eq!(alloc1.gateway, Ipv4Addr::new(10, 88, 0, 1));
assert_eq!(alloc1.prefix_len, 16);
let alloc2 = ipam.allocate("default", "pod-b").unwrap();
assert_eq!(alloc2.ip_address, Ipv4Addr::new(10, 88, 0, 3));
}
#[test]
fn test_allocate_idempotent() {
let ipam = make_test_ipam("10.88.0.0/16");
let alloc1 = ipam.allocate("default", "pod-a").unwrap();
let alloc2 = ipam.allocate("default", "pod-a").unwrap();
assert_eq!(alloc1.ip_address, alloc2.ip_address);
}
#[test]
fn test_release_and_reallocate() {
let ipam = make_test_ipam("10.88.0.0/16");
let alloc1 = ipam.allocate("default", "pod-a").unwrap();
let first_ip = alloc1.ip_address;
// Allocate a second pod
let _alloc2 = ipam.allocate("default", "pod-b").unwrap();
// Release first pod
let released = ipam.release("default", "pod-a").unwrap();
assert_eq!(released, Some(first_ip));
// New pod should reuse the freed IP
let alloc3 = ipam.allocate("default", "pod-c").unwrap();
assert_eq!(alloc3.ip_address, first_ip);
}
#[test]
fn test_pool_exhaustion() {
// /30 gives us network .0, gateway .1, one host .2, broadcast .3
let ipam = make_test_ipam("10.0.0.0/30");
// First allocation should succeed (.2)
let alloc = ipam.allocate("default", "pod-a").unwrap();
assert_eq!(alloc.ip_address, Ipv4Addr::new(10, 0, 0, 2));
// Second allocation should fail (only .2 is usable, .3 is broadcast)
let result = ipam.allocate("default", "pod-b");
assert!(matches!(
result.unwrap_err(),
RuntimeError::IpamPoolExhausted { .. }
));
}
#[test]
fn test_get_all_allocations() {
let ipam = make_test_ipam("10.88.0.0/16");
ipam.allocate("default", "pod-a").unwrap();
ipam.allocate("kube-system", "pod-b").unwrap();
let allocs = ipam.get_all_allocations().unwrap();
assert_eq!(allocs.len(), 2);
assert_eq!(allocs[&Ipv4Addr::new(10, 88, 0, 2)], "default/pod-a");
assert_eq!(allocs[&Ipv4Addr::new(10, 88, 0, 3)], "kube-system/pod-b");
}
#[test]
fn test_release_nonexistent() {
let ipam = make_test_ipam("10.88.0.0/16");
let released = ipam.release("default", "nonexistent").unwrap();
assert_eq!(released, None);
}
}

View file

@ -1,6 +1,8 @@
pub mod ipam;
pub mod types; pub mod types;
pub use crate::types::{DirectNicConfig, EtherstubConfig, NetworkMode}; pub use crate::types::{DirectNicConfig, EtherstubConfig, NetworkMode};
pub use ipam::{CidrConfig, IpAllocation, Ipam};
/// Generate a VNIC name from pod namespace and name /// Generate a VNIC name from pod namespace and name
pub fn vnic_name_for_pod(namespace: &str, pod_name: &str) -> String { pub fn vnic_name_for_pod(namespace: &str, pod_name: &str) -> String {

View file

@ -104,6 +104,8 @@ pub struct EtherstubConfig {
pub ip_address: String, pub ip_address: String,
/// Gateway address /// Gateway address
pub gateway: String, pub gateway: String,
/// CIDR prefix length (e.g., 16 for /16)
pub prefix_len: u8,
} }
/// Direct NIC-based network configuration /// Direct NIC-based network configuration
@ -117,6 +119,8 @@ pub struct DirectNicConfig {
pub ip_address: String, pub ip_address: String,
/// Gateway address /// Gateway address
pub gateway: String, pub gateway: String,
/// CIDR prefix length (e.g., 16 for /16)
pub prefix_len: u8,
} }
/// ZFS dataset configuration for zone storage /// ZFS dataset configuration for zone storage

View file

@ -11,12 +11,24 @@ pub fn generate_zonecfg(config: &ZoneConfig) -> Result<String> {
lines.push("set ip-type=exclusive".to_string()); lines.push("set ip-type=exclusive".to_string());
// Network resource // Network resource
let vnic_name = match &config.network { let (vnic_name, ip_address, gateway, prefix_len) = match &config.network {
NetworkMode::Etherstub(cfg) => &cfg.vnic_name, NetworkMode::Etherstub(cfg) => (
NetworkMode::Direct(cfg) => &cfg.vnic_name, &cfg.vnic_name,
&cfg.ip_address,
&cfg.gateway,
cfg.prefix_len,
),
NetworkMode::Direct(cfg) => (
&cfg.vnic_name,
&cfg.ip_address,
&cfg.gateway,
cfg.prefix_len,
),
}; };
lines.push("add net".to_string()); lines.push("add net".to_string());
lines.push(format!("set physical={}", vnic_name)); lines.push(format!("set physical={}", vnic_name));
lines.push(format!("set allowed-address={}/{}", ip_address, prefix_len));
lines.push(format!("set defrouter={}", gateway));
lines.push("end".to_string()); lines.push("end".to_string());
// CPU cap // CPU cap
@ -67,6 +79,7 @@ mod tests {
vnic_name: "vnic0".to_string(), vnic_name: "vnic0".to_string(),
ip_address: "10.0.0.2".to_string(), ip_address: "10.0.0.2".to_string(),
gateway: "10.0.0.1".to_string(), gateway: "10.0.0.1".to_string(),
prefix_len: 16,
}), }),
zfs: ZfsConfig { zfs: ZfsConfig {
parent_dataset: "rpool/zones".to_string(), parent_dataset: "rpool/zones".to_string(),
@ -85,6 +98,8 @@ mod tests {
assert!(result.contains("set zonepath=/zones/test-zone")); assert!(result.contains("set zonepath=/zones/test-zone"));
assert!(result.contains("set ip-type=exclusive")); assert!(result.contains("set ip-type=exclusive"));
assert!(result.contains("set physical=vnic0")); assert!(result.contains("set physical=vnic0"));
assert!(result.contains("set allowed-address=10.0.0.2/16"));
assert!(result.contains("set defrouter=10.0.0.1"));
assert!(result.contains("set ncpus=2.0")); assert!(result.contains("set ncpus=2.0"));
assert!(result.contains("set physical=1G")); assert!(result.contains("set physical=1G"));
assert!(result.contains("verify")); assert!(result.contains("verify"));
@ -102,6 +117,7 @@ mod tests {
vnic_name: "vnic1".to_string(), vnic_name: "vnic1".to_string(),
ip_address: "192.168.1.10".to_string(), ip_address: "192.168.1.10".to_string(),
gateway: "192.168.1.1".to_string(), gateway: "192.168.1.1".to_string(),
prefix_len: 24,
}), }),
zfs: ZfsConfig { zfs: ZfsConfig {
parent_dataset: "rpool/zones".to_string(), parent_dataset: "rpool/zones".to_string(),
@ -128,6 +144,8 @@ mod tests {
let result = generate_zonecfg(&config).unwrap(); let result = generate_zonecfg(&config).unwrap();
assert!(result.contains("set brand=reddwarf")); assert!(result.contains("set brand=reddwarf"));
assert!(result.contains("set physical=vnic1")); assert!(result.contains("set physical=vnic1"));
assert!(result.contains("set allowed-address=192.168.1.10/24"));
assert!(result.contains("set defrouter=192.168.1.1"));
assert!(result.contains("set physical=512M")); assert!(result.contains("set physical=512M"));
assert!(result.contains("add fs")); assert!(result.contains("add fs"));
assert!(result.contains("set dir=/etc/app")); assert!(result.contains("set dir=/etc/app"));

View file

@ -2,8 +2,8 @@ use clap::{Parser, Subcommand};
use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig}; use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig};
use reddwarf_core::Namespace; use reddwarf_core::Namespace;
use reddwarf_runtime::{ use reddwarf_runtime::{
ApiClient, EtherstubConfig, MockRuntime, NetworkMode, NodeAgent, NodeAgentConfig, ApiClient, Ipam, MockRuntime, NodeAgent, NodeAgentConfig, PodController, PodControllerConfig,
PodController, PodControllerConfig, ZoneBrand, ZoneBrand,
}; };
use reddwarf_scheduler::scheduler::SchedulerConfig; use reddwarf_scheduler::scheduler::SchedulerConfig;
use reddwarf_scheduler::Scheduler; use reddwarf_scheduler::Scheduler;
@ -48,6 +48,12 @@ enum Commands {
/// Parent ZFS dataset for zone storage /// Parent ZFS dataset for zone storage
#[arg(long, default_value = "rpool/zones")] #[arg(long, default_value = "rpool/zones")]
zfs_parent: String, zfs_parent: String,
/// Pod network CIDR for IPAM allocation
#[arg(long, default_value = "10.88.0.0/16")]
pod_cidr: String,
/// Etherstub name for pod networking
#[arg(long, default_value = "reddwarf0")]
etherstub_name: String,
}, },
} }
@ -71,7 +77,20 @@ async fn main() -> miette::Result<()> {
data_dir, data_dir,
zonepath_prefix, zonepath_prefix,
zfs_parent, zfs_parent,
} => run_agent(&node_name, &bind, &data_dir, &zonepath_prefix, &zfs_parent).await, pod_cidr,
etherstub_name,
} => {
run_agent(
&node_name,
&bind,
&data_dir,
&zonepath_prefix,
&zfs_parent,
&pod_cidr,
&etherstub_name,
)
.await
}
} }
} }
@ -105,6 +124,8 @@ async fn run_agent(
data_dir: &str, data_dir: &str,
zonepath_prefix: &str, zonepath_prefix: &str,
zfs_parent: &str, zfs_parent: &str,
pod_cidr: &str,
etherstub_name: &str,
) -> miette::Result<()> { ) -> miette::Result<()> {
info!("Starting reddwarf agent for node '{}'", node_name); info!("Starting reddwarf agent for node '{}'", node_name);
@ -158,7 +179,12 @@ async fn run_agent(
// 3. Create runtime (MockRuntime on non-illumos, IllumosRuntime on illumos) // 3. Create runtime (MockRuntime on non-illumos, IllumosRuntime on illumos)
let runtime: Arc<dyn reddwarf_runtime::ZoneRuntime> = create_runtime(); let runtime: Arc<dyn reddwarf_runtime::ZoneRuntime> = create_runtime();
// 4. Spawn pod controller // 4. Create IPAM for per-pod IP allocation
let ipam = Ipam::new(state.storage.clone(), pod_cidr).map_err(|e| {
miette::miette!("Failed to initialize IPAM with CIDR '{}': {}", pod_cidr, e)
})?;
// 5. Spawn pod controller
let api_client = Arc::new(ApiClient::new(&api_url)); let api_client = Arc::new(ApiClient::new(&api_url));
let controller_config = PodControllerConfig { let controller_config = PodControllerConfig {
node_name: node_name.to_string(), node_name: node_name.to_string(),
@ -166,12 +192,8 @@ async fn run_agent(
zonepath_prefix: zonepath_prefix.to_string(), zonepath_prefix: zonepath_prefix.to_string(),
zfs_parent_dataset: zfs_parent.to_string(), zfs_parent_dataset: zfs_parent.to_string(),
default_brand: ZoneBrand::Reddwarf, default_brand: ZoneBrand::Reddwarf,
network: NetworkMode::Etherstub(EtherstubConfig { etherstub_name: etherstub_name.to_string(),
etherstub_name: "reddwarf0".to_string(), pod_cidr: pod_cidr.to_string(),
vnic_name: "reddwarf_vnic0".to_string(),
ip_address: "10.88.0.2".to_string(),
gateway: "10.88.0.1".to_string(),
}),
}; };
let controller = PodController::new( let controller = PodController::new(
@ -179,6 +201,7 @@ async fn run_agent(
api_client.clone(), api_client.clone(),
state.event_tx.clone(), state.event_tx.clone(),
controller_config, controller_config,
ipam,
); );
let controller_token = token.clone(); let controller_token = token.clone();
let controller_handle = tokio::spawn(async move { let controller_handle = tokio::spawn(async move {
@ -187,7 +210,7 @@ async fn run_agent(
} }
}); });
// 5. Spawn node agent // 6. Spawn node agent
let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url); let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
let node_agent = NodeAgent::new(api_client, node_agent_config); let node_agent = NodeAgent::new(api_client, node_agent_config);
let agent_token = token.clone(); let agent_token = token.clone();
@ -198,8 +221,8 @@ async fn run_agent(
}); });
info!( info!(
"All components started. API server on {}, node name: {}", "All components started. API server on {}, node name: {}, pod CIDR: {}",
bind, node_name bind, node_name, pod_cidr
); );
// Wait for shutdown signal // Wait for shutdown signal