mirror of
https://github.com/CloudNebulaProject/reddwarf.git
synced 2026-04-10 13:20:40 +00:00
Add pod networking: IPAM, per-pod VNICs, and zone IP configuration
Each pod now gets a unique VNIC name and IP address from a configurable CIDR pool, with IPs released on pod deletion. This replaces the hardcoded single VNIC/IP that prevented multiple pods from running. - Add redb-backed IPAM module with allocate/release/idempotent semantics - Add prefix_len to EtherstubConfig and DirectNicConfig - Generate allowed-address and defrouter in zonecfg net blocks - Wire vnic_name_for_pod() into controller for unique VNIC names - Add --pod-cidr and --etherstub-name CLI flags to agent subcommand - Add StorageError and IpamPoolExhausted error variants with diagnostics Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
c50ecb2664
commit
57186ebe68
12 changed files with 609 additions and 118 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -1393,6 +1393,7 @@ dependencies = [
|
|||
"k8s-openapi",
|
||||
"miette",
|
||||
"reddwarf-core",
|
||||
"reddwarf-storage",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ rust-version.workspace = true
|
|||
|
||||
[dependencies]
|
||||
reddwarf-core = { workspace = true }
|
||||
reddwarf-storage = { workspace = true }
|
||||
k8s-openapi = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tokio-stream = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ mod tests {
|
|||
vnic_name: "vnic0".to_string(),
|
||||
ip_address: "10.0.0.2".to_string(),
|
||||
gateway: "10.0.0.1".to_string(),
|
||||
prefix_len: 16,
|
||||
}),
|
||||
zfs: ZfsConfig {
|
||||
parent_dataset: "rpool/zones".to_string(),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::api_client::ApiClient;
|
||||
use crate::error::{Result, RuntimeError};
|
||||
use crate::network::{vnic_name_for_pod, Ipam};
|
||||
use crate::traits::ZoneRuntime;
|
||||
use crate::types::*;
|
||||
use k8s_openapi::api::core::v1::{Pod, PodCondition, PodStatus};
|
||||
|
|
@ -22,8 +23,10 @@ pub struct PodControllerConfig {
|
|||
pub zfs_parent_dataset: String,
|
||||
/// Default zone brand
|
||||
pub default_brand: ZoneBrand,
|
||||
/// Default network configuration
|
||||
pub network: NetworkMode,
|
||||
/// Name of the etherstub for pod networking
|
||||
pub etherstub_name: String,
|
||||
/// Pod CIDR (e.g., "10.88.0.0/16")
|
||||
pub pod_cidr: String,
|
||||
}
|
||||
|
||||
/// Pod controller that watches for Pod events and drives zone lifecycle
|
||||
|
|
@ -32,6 +35,7 @@ pub struct PodController {
|
|||
api_client: Arc<ApiClient>,
|
||||
event_tx: broadcast::Sender<ResourceEvent>,
|
||||
config: PodControllerConfig,
|
||||
ipam: Ipam,
|
||||
}
|
||||
|
||||
impl PodController {
|
||||
|
|
@ -40,12 +44,14 @@ impl PodController {
|
|||
api_client: Arc<ApiClient>,
|
||||
event_tx: broadcast::Sender<ResourceEvent>,
|
||||
config: PodControllerConfig,
|
||||
ipam: Ipam,
|
||||
) -> Self {
|
||||
Self {
|
||||
runtime,
|
||||
api_client,
|
||||
event_tx,
|
||||
config,
|
||||
ipam,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -205,7 +211,7 @@ impl PodController {
|
|||
"" | "Pending" => {
|
||||
// Pod is assigned to us but has no phase — provision it
|
||||
info!("Provisioning zone for pod {}/{}", namespace, pod_name);
|
||||
let zone_config = pod_to_zone_config(pod, &self.config)?;
|
||||
let zone_config = self.pod_to_zone_config(pod)?;
|
||||
|
||||
match self.runtime.provision(&zone_config).await {
|
||||
Ok(()) => {
|
||||
|
|
@ -328,7 +334,7 @@ impl PodController {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle pod deletion — deprovision the zone
|
||||
/// Handle pod deletion — deprovision the zone and release IP
|
||||
pub async fn handle_delete(&self, pod: &Pod) -> Result<()> {
|
||||
let pod_name = pod
|
||||
.metadata
|
||||
|
|
@ -348,7 +354,7 @@ impl PodController {
|
|||
}
|
||||
}
|
||||
|
||||
let zone_config = pod_to_zone_config(pod, &self.config)?;
|
||||
let zone_config = self.pod_to_zone_config(pod)?;
|
||||
info!(
|
||||
"Deprovisioning zone for deleted pod {}/{}",
|
||||
namespace, pod_name
|
||||
|
|
@ -361,9 +367,96 @@ impl PodController {
|
|||
);
|
||||
}
|
||||
|
||||
// Release the IP allocation
|
||||
if let Err(e) = self.ipam.release(namespace, pod_name) {
|
||||
warn!(
|
||||
"Failed to release IP for pod {}/{}: {}",
|
||||
namespace, pod_name, e
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert a Pod spec to a ZoneConfig with per-pod VNIC and IP
|
||||
fn pod_to_zone_config(&self, pod: &Pod) -> Result<ZoneConfig> {
|
||||
let pod_name = pod
|
||||
.metadata
|
||||
.name
|
||||
.as_deref()
|
||||
.ok_or_else(|| RuntimeError::internal_error("Pod has no name"))?;
|
||||
let namespace = pod.metadata.namespace.as_deref().unwrap_or("default");
|
||||
|
||||
let spec = pod
|
||||
.spec
|
||||
.as_ref()
|
||||
.ok_or_else(|| RuntimeError::internal_error("Pod has no spec"))?;
|
||||
|
||||
let zone_name = pod_zone_name(namespace, pod_name);
|
||||
let zonepath = format!("{}/{}", self.config.zonepath_prefix, zone_name);
|
||||
|
||||
// Allocate a unique VNIC name and IP for this pod
|
||||
let vnic_name = vnic_name_for_pod(namespace, pod_name);
|
||||
let allocation = self.ipam.allocate(namespace, pod_name)?;
|
||||
|
||||
let network = NetworkMode::Etherstub(EtherstubConfig {
|
||||
etherstub_name: self.config.etherstub_name.clone(),
|
||||
vnic_name,
|
||||
ip_address: allocation.ip_address.to_string(),
|
||||
gateway: allocation.gateway.to_string(),
|
||||
prefix_len: allocation.prefix_len,
|
||||
});
|
||||
|
||||
// Map containers to ContainerProcess entries
|
||||
let processes: Vec<ContainerProcess> = spec
|
||||
.containers
|
||||
.iter()
|
||||
.map(|c| {
|
||||
let command = c
|
||||
.command
|
||||
.clone()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.chain(c.args.clone().unwrap_or_default())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let env = c
|
||||
.env
|
||||
.as_ref()
|
||||
.map(|envs| {
|
||||
envs.iter()
|
||||
.filter_map(|e| e.value.as_ref().map(|v| (e.name.clone(), v.clone())))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
ContainerProcess {
|
||||
name: c.name.clone(),
|
||||
command,
|
||||
working_dir: c.working_dir.clone(),
|
||||
env,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(ZoneConfig {
|
||||
zone_name,
|
||||
brand: self.config.default_brand.clone(),
|
||||
zonepath,
|
||||
network,
|
||||
zfs: ZfsConfig {
|
||||
parent_dataset: self.config.zfs_parent_dataset.clone(),
|
||||
clone_from: None,
|
||||
quota: None,
|
||||
},
|
||||
lx_image_path: None,
|
||||
processes,
|
||||
cpu_cap: None,
|
||||
memory_cap: None,
|
||||
fs_mounts: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract IP address from zone config network
|
||||
fn zone_ip(&self, config: &ZoneConfig) -> String {
|
||||
match &config.network {
|
||||
|
|
@ -396,77 +489,38 @@ pub fn pod_zone_name(namespace: &str, pod_name: &str) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
/// Convert a Pod spec to a ZoneConfig for the runtime
|
||||
pub fn pod_to_zone_config(pod: &Pod, config: &PodControllerConfig) -> Result<ZoneConfig> {
|
||||
let pod_name = pod
|
||||
.metadata
|
||||
.name
|
||||
.as_deref()
|
||||
.ok_or_else(|| RuntimeError::internal_error("Pod has no name"))?;
|
||||
let namespace = pod.metadata.namespace.as_deref().unwrap_or("default");
|
||||
|
||||
let spec = pod
|
||||
.spec
|
||||
.as_ref()
|
||||
.ok_or_else(|| RuntimeError::internal_error("Pod has no spec"))?;
|
||||
|
||||
let zone_name = pod_zone_name(namespace, pod_name);
|
||||
let zonepath = format!("{}/{}", config.zonepath_prefix, zone_name);
|
||||
|
||||
// Map containers to ContainerProcess entries
|
||||
let processes: Vec<ContainerProcess> = spec
|
||||
.containers
|
||||
.iter()
|
||||
.map(|c| {
|
||||
let command = c
|
||||
.command
|
||||
.clone()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.chain(c.args.clone().unwrap_or_default())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let env = c
|
||||
.env
|
||||
.as_ref()
|
||||
.map(|envs| {
|
||||
envs.iter()
|
||||
.filter_map(|e| e.value.as_ref().map(|v| (e.name.clone(), v.clone())))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
ContainerProcess {
|
||||
name: c.name.clone(),
|
||||
command,
|
||||
working_dir: c.working_dir.clone(),
|
||||
env,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(ZoneConfig {
|
||||
zone_name,
|
||||
brand: config.default_brand.clone(),
|
||||
zonepath,
|
||||
network: config.network.clone(),
|
||||
zfs: ZfsConfig {
|
||||
parent_dataset: config.zfs_parent_dataset.clone(),
|
||||
clone_from: None,
|
||||
quota: None,
|
||||
},
|
||||
lx_image_path: None,
|
||||
processes,
|
||||
cpu_cap: None,
|
||||
memory_cap: None,
|
||||
fs_mounts: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::network::Ipam;
|
||||
use k8s_openapi::api::core::v1::{Container, PodSpec};
|
||||
use reddwarf_storage::RedbBackend;
|
||||
use std::net::Ipv4Addr;
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn make_test_controller() -> (PodController, tempfile::TempDir) {
|
||||
let dir = tempdir().unwrap();
|
||||
let db_path = dir.path().join("test-controller.redb");
|
||||
let storage = Arc::new(RedbBackend::new(&db_path).unwrap());
|
||||
let ipam = Ipam::new(storage, "10.88.0.0/16").unwrap();
|
||||
|
||||
let runtime = Arc::new(crate::mock::MockRuntime::new());
|
||||
let api_client = Arc::new(ApiClient::new("http://127.0.0.1:6443"));
|
||||
let (event_tx, _) = broadcast::channel(16);
|
||||
|
||||
let config = PodControllerConfig {
|
||||
node_name: "node1".to_string(),
|
||||
api_url: "http://127.0.0.1:6443".to_string(),
|
||||
zonepath_prefix: "/zones".to_string(),
|
||||
zfs_parent_dataset: "rpool/zones".to_string(),
|
||||
default_brand: ZoneBrand::Reddwarf,
|
||||
etherstub_name: "reddwarf0".to_string(),
|
||||
pod_cidr: "10.88.0.0/16".to_string(),
|
||||
};
|
||||
|
||||
let controller = PodController::new(runtime, api_client, event_tx, config, ipam);
|
||||
(controller, dir)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_zone_name_basic() {
|
||||
|
|
@ -491,6 +545,8 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_pod_to_zone_config_maps_containers() {
|
||||
let (controller, _dir) = make_test_controller();
|
||||
|
||||
let mut pod = Pod::default();
|
||||
pod.metadata.name = Some("test-pod".to_string());
|
||||
pod.metadata.namespace = Some("default".to_string());
|
||||
|
|
@ -511,21 +567,7 @@ mod tests {
|
|||
..Default::default()
|
||||
});
|
||||
|
||||
let config = PodControllerConfig {
|
||||
node_name: "node1".to_string(),
|
||||
api_url: "http://127.0.0.1:6443".to_string(),
|
||||
zonepath_prefix: "/zones".to_string(),
|
||||
zfs_parent_dataset: "rpool/zones".to_string(),
|
||||
default_brand: ZoneBrand::Reddwarf,
|
||||
network: NetworkMode::Etherstub(EtherstubConfig {
|
||||
etherstub_name: "reddwarf0".to_string(),
|
||||
vnic_name: "vnic0".to_string(),
|
||||
ip_address: "10.0.0.2".to_string(),
|
||||
gateway: "10.0.0.1".to_string(),
|
||||
}),
|
||||
};
|
||||
|
||||
let zone_config = pod_to_zone_config(&pod, &config).unwrap();
|
||||
let zone_config = controller.pod_to_zone_config(&pod).unwrap();
|
||||
|
||||
assert_eq!(zone_config.zone_name, "reddwarf-default-test-pod");
|
||||
assert_eq!(zone_config.zonepath, "/zones/reddwarf-default-test-pod");
|
||||
|
|
@ -539,29 +581,74 @@ mod tests {
|
|||
assert_eq!(zone_config.processes[1].command, vec!["/bin/sh", "-c"]);
|
||||
assert_eq!(zone_config.brand, ZoneBrand::Reddwarf);
|
||||
assert_eq!(zone_config.zfs.parent_dataset, "rpool/zones");
|
||||
|
||||
// Verify per-pod networking
|
||||
match &zone_config.network {
|
||||
NetworkMode::Etherstub(cfg) => {
|
||||
assert_eq!(cfg.etherstub_name, "reddwarf0");
|
||||
assert_eq!(cfg.vnic_name, "vnic_default_test_pod");
|
||||
assert_eq!(cfg.ip_address, Ipv4Addr::new(10, 88, 0, 2).to_string());
|
||||
assert_eq!(cfg.gateway, Ipv4Addr::new(10, 88, 0, 1).to_string());
|
||||
assert_eq!(cfg.prefix_len, 16);
|
||||
}
|
||||
_ => panic!("Expected Etherstub network mode"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_to_zone_config_unique_ips() {
|
||||
let (controller, _dir) = make_test_controller();
|
||||
|
||||
let mut pod_a = Pod::default();
|
||||
pod_a.metadata.name = Some("pod-a".to_string());
|
||||
pod_a.metadata.namespace = Some("default".to_string());
|
||||
pod_a.spec = Some(PodSpec {
|
||||
containers: vec![Container {
|
||||
name: "web".to_string(),
|
||||
command: Some(vec!["/bin/sh".to_string()]),
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let mut pod_b = Pod::default();
|
||||
pod_b.metadata.name = Some("pod-b".to_string());
|
||||
pod_b.metadata.namespace = Some("default".to_string());
|
||||
pod_b.spec = Some(PodSpec {
|
||||
containers: vec![Container {
|
||||
name: "web".to_string(),
|
||||
command: Some(vec!["/bin/sh".to_string()]),
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
let config_a = controller.pod_to_zone_config(&pod_a).unwrap();
|
||||
let config_b = controller.pod_to_zone_config(&pod_b).unwrap();
|
||||
|
||||
let ip_a = match &config_a.network {
|
||||
NetworkMode::Etherstub(cfg) => cfg.ip_address.clone(),
|
||||
_ => panic!("Expected Etherstub"),
|
||||
};
|
||||
let ip_b = match &config_b.network {
|
||||
NetworkMode::Etherstub(cfg) => cfg.ip_address.clone(),
|
||||
_ => panic!("Expected Etherstub"),
|
||||
};
|
||||
|
||||
assert_ne!(ip_a, ip_b, "Each pod should get a unique IP");
|
||||
assert_eq!(ip_a, "10.88.0.2");
|
||||
assert_eq!(ip_b, "10.88.0.3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pod_to_zone_config_no_spec_returns_error() {
|
||||
let (controller, _dir) = make_test_controller();
|
||||
|
||||
let mut pod = Pod::default();
|
||||
pod.metadata.name = Some("test-pod".to_string());
|
||||
// No spec set
|
||||
|
||||
let config = PodControllerConfig {
|
||||
node_name: "node1".to_string(),
|
||||
api_url: "http://127.0.0.1:6443".to_string(),
|
||||
zonepath_prefix: "/zones".to_string(),
|
||||
zfs_parent_dataset: "rpool/zones".to_string(),
|
||||
default_brand: ZoneBrand::Reddwarf,
|
||||
network: NetworkMode::Etherstub(EtherstubConfig {
|
||||
etherstub_name: "reddwarf0".to_string(),
|
||||
vnic_name: "vnic0".to_string(),
|
||||
ip_address: "10.0.0.2".to_string(),
|
||||
gateway: "10.0.0.1".to_string(),
|
||||
}),
|
||||
};
|
||||
|
||||
let result = pod_to_zone_config(&pod, &config);
|
||||
let result = controller.pod_to_zone_config(&pod);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,6 +115,22 @@ pub enum RuntimeError {
|
|||
#[diagnostic(transparent)]
|
||||
CoreError(#[from] reddwarf_core::ReddwarfError),
|
||||
|
||||
/// Storage error
|
||||
#[error(transparent)]
|
||||
#[diagnostic(transparent)]
|
||||
StorageError(#[from] reddwarf_storage::StorageError),
|
||||
|
||||
/// IP address pool exhausted
|
||||
#[error("IPAM pool exhausted: no free addresses in {cidr}")]
|
||||
#[diagnostic(
|
||||
code(reddwarf::runtime::ipam_pool_exhausted),
|
||||
help("Expand the pod CIDR range or delete unused pods to free addresses")
|
||||
)]
|
||||
IpamPoolExhausted {
|
||||
#[allow(unused)]
|
||||
cidr: String,
|
||||
},
|
||||
|
||||
/// Internal error
|
||||
#[error("Internal runtime error: {message}")]
|
||||
#[diagnostic(
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ pub mod zone;
|
|||
// Re-export primary types
|
||||
pub use error::{Result, RuntimeError};
|
||||
pub use mock::MockRuntime;
|
||||
pub use network::{CidrConfig, IpAllocation, Ipam};
|
||||
pub use traits::ZoneRuntime;
|
||||
pub use types::{
|
||||
ContainerProcess, DirectNicConfig, EtherstubConfig, FsMount, NetworkMode, ZfsConfig, ZoneBrand,
|
||||
|
|
|
|||
|
|
@ -312,6 +312,7 @@ mod tests {
|
|||
vnic_name: format!("vnic_{}", name),
|
||||
ip_address: "10.0.0.2".to_string(),
|
||||
gateway: "10.0.0.1".to_string(),
|
||||
prefix_len: 16,
|
||||
}),
|
||||
zfs: ZfsConfig {
|
||||
parent_dataset: "rpool/zones".to_string(),
|
||||
|
|
|
|||
336
crates/reddwarf-runtime/src/network/ipam.rs
Normal file
336
crates/reddwarf-runtime/src/network/ipam.rs
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
use crate::error::{Result, RuntimeError};
|
||||
use reddwarf_storage::KVStore;
|
||||
use std::collections::BTreeMap;
|
||||
use std::net::Ipv4Addr;
|
||||
use std::sync::Arc;
|
||||
use tracing::debug;
|
||||
|
||||
/// Parsed CIDR configuration
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CidrConfig {
|
||||
/// Base network address
|
||||
pub network: Ipv4Addr,
|
||||
/// CIDR prefix length
|
||||
pub prefix_len: u8,
|
||||
/// Gateway address (network + 1)
|
||||
pub gateway: Ipv4Addr,
|
||||
/// First allocatable host address (network + 2)
|
||||
pub first_host: Ipv4Addr,
|
||||
/// Broadcast address (last in range)
|
||||
pub broadcast: Ipv4Addr,
|
||||
}
|
||||
|
||||
/// An allocated IP for a pod
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IpAllocation {
|
||||
pub ip_address: Ipv4Addr,
|
||||
pub gateway: Ipv4Addr,
|
||||
pub prefix_len: u8,
|
||||
}
|
||||
|
||||
/// IPAM (IP Address Management) backed by a KVStore
|
||||
///
|
||||
/// Storage keys:
|
||||
/// - `ipam/_cidr` → the CIDR string (e.g. "10.88.0.0/16")
|
||||
/// - `ipam/alloc/{ip}` → `"{namespace}/{pod_name}"`
|
||||
pub struct Ipam {
|
||||
storage: Arc<dyn KVStore>,
|
||||
cidr: CidrConfig,
|
||||
}
|
||||
|
||||
const IPAM_CIDR_KEY: &[u8] = b"ipam/_cidr";
|
||||
const IPAM_ALLOC_PREFIX: &[u8] = b"ipam/alloc/";
|
||||
|
||||
impl Ipam {
|
||||
/// Create a new IPAM instance, persisting the CIDR config
|
||||
pub fn new(storage: Arc<dyn KVStore>, cidr_str: &str) -> Result<Self> {
|
||||
let cidr = parse_cidr(cidr_str)?;
|
||||
|
||||
// Persist the CIDR configuration
|
||||
storage.put(IPAM_CIDR_KEY, cidr_str.as_bytes())?;
|
||||
|
||||
debug!(
|
||||
"IPAM initialized: network={}, gateway={}, first_host={}, broadcast={}, prefix_len={}",
|
||||
cidr.network, cidr.gateway, cidr.first_host, cidr.broadcast, cidr.prefix_len
|
||||
);
|
||||
|
||||
Ok(Self { storage, cidr })
|
||||
}
|
||||
|
||||
/// Allocate an IP for a pod. Idempotent: returns existing allocation if one exists.
|
||||
pub fn allocate(&self, namespace: &str, pod_name: &str) -> Result<IpAllocation> {
|
||||
let pod_key = format!("{}/{}", namespace, pod_name);
|
||||
|
||||
// Check if this pod already has an allocation
|
||||
let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?;
|
||||
for (key, value) in &allocations {
|
||||
let existing_pod = String::from_utf8_lossy(value);
|
||||
if existing_pod == pod_key {
|
||||
// Parse the IP from the key: "ipam/alloc/{ip}"
|
||||
let key_str = String::from_utf8_lossy(key);
|
||||
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
|
||||
if let Ok(ip) = ip_str.parse::<Ipv4Addr>() {
|
||||
debug!("IPAM: returning existing allocation {} for {}", ip, pod_key);
|
||||
return Ok(IpAllocation {
|
||||
ip_address: ip,
|
||||
gateway: self.cidr.gateway,
|
||||
prefix_len: self.cidr.prefix_len,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collect already-allocated IPs
|
||||
let allocated: std::collections::HashSet<Ipv4Addr> = allocations
|
||||
.iter()
|
||||
.filter_map(|(key, _)| {
|
||||
let key_str = String::from_utf8_lossy(key);
|
||||
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
|
||||
ip_str.parse::<Ipv4Addr>().ok()
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Find next free IP starting from first_host
|
||||
let mut candidate = self.cidr.first_host;
|
||||
loop {
|
||||
if candidate >= self.cidr.broadcast {
|
||||
return Err(RuntimeError::IpamPoolExhausted {
|
||||
cidr: format!("{}/{}", self.cidr.network, self.cidr.prefix_len),
|
||||
});
|
||||
}
|
||||
|
||||
if !allocated.contains(&candidate) {
|
||||
// Allocate this IP
|
||||
let alloc_key = format!("ipam/alloc/{}", candidate);
|
||||
self.storage.put(alloc_key.as_bytes(), pod_key.as_bytes())?;
|
||||
|
||||
debug!("IPAM: allocated {} for {}", candidate, pod_key);
|
||||
return Ok(IpAllocation {
|
||||
ip_address: candidate,
|
||||
gateway: self.cidr.gateway,
|
||||
prefix_len: self.cidr.prefix_len,
|
||||
});
|
||||
}
|
||||
|
||||
candidate = next_ip(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
/// Release the IP allocated to a pod
|
||||
pub fn release(&self, namespace: &str, pod_name: &str) -> Result<Option<Ipv4Addr>> {
|
||||
let pod_key = format!("{}/{}", namespace, pod_name);
|
||||
|
||||
let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?;
|
||||
for (key, value) in &allocations {
|
||||
let existing_pod = String::from_utf8_lossy(value);
|
||||
if existing_pod == pod_key {
|
||||
let key_str = String::from_utf8_lossy(key);
|
||||
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
|
||||
let ip = ip_str.parse::<Ipv4Addr>().ok();
|
||||
|
||||
self.storage.delete(key)?;
|
||||
debug!("IPAM: released {:?} for {}", ip, pod_key);
|
||||
return Ok(ip);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("IPAM: no allocation found for {}", pod_key);
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Get all current allocations
|
||||
pub fn get_all_allocations(&self) -> Result<BTreeMap<Ipv4Addr, String>> {
|
||||
let allocations = self.storage.scan(IPAM_ALLOC_PREFIX)?;
|
||||
let mut result = BTreeMap::new();
|
||||
|
||||
for (key, value) in &allocations {
|
||||
let key_str = String::from_utf8_lossy(key);
|
||||
let ip_str = &key_str[IPAM_ALLOC_PREFIX.len()..];
|
||||
if let Ok(ip) = ip_str.parse::<Ipv4Addr>() {
|
||||
result.insert(ip, String::from_utf8_lossy(value).into_owned());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a CIDR string like "10.88.0.0/16" into a CidrConfig
|
||||
pub fn parse_cidr(cidr_str: &str) -> Result<CidrConfig> {
|
||||
let parts: Vec<&str> = cidr_str.split('/').collect();
|
||||
if parts.len() != 2 {
|
||||
return Err(RuntimeError::invalid_config(
|
||||
format!("Invalid CIDR format: '{}'", cidr_str),
|
||||
"Use format like '10.88.0.0/16'",
|
||||
));
|
||||
}
|
||||
|
||||
let network: Ipv4Addr = parts[0].parse().map_err(|_| {
|
||||
RuntimeError::invalid_config(
|
||||
format!("Invalid network address: '{}'", parts[0]),
|
||||
"Use a valid IPv4 address like '10.88.0.0'",
|
||||
)
|
||||
})?;
|
||||
|
||||
let prefix_len: u8 = parts[1].parse().map_err(|_| {
|
||||
RuntimeError::invalid_config(
|
||||
format!("Invalid prefix length: '{}'", parts[1]),
|
||||
"Use a number between 0 and 32",
|
||||
)
|
||||
})?;
|
||||
|
||||
if prefix_len > 32 {
|
||||
return Err(RuntimeError::invalid_config(
|
||||
format!("Prefix length {} is out of range", prefix_len),
|
||||
"Use a number between 0 and 32",
|
||||
));
|
||||
}
|
||||
|
||||
let network_u32 = u32::from(network);
|
||||
let host_bits = 32 - prefix_len;
|
||||
let mask = if prefix_len == 0 {
|
||||
0u32
|
||||
} else {
|
||||
!((1u32 << host_bits) - 1)
|
||||
};
|
||||
let broadcast_u32 = network_u32 | !mask;
|
||||
|
||||
let gateway = Ipv4Addr::from(network_u32 + 1);
|
||||
let first_host = Ipv4Addr::from(network_u32 + 2);
|
||||
let broadcast = Ipv4Addr::from(broadcast_u32);
|
||||
|
||||
Ok(CidrConfig {
|
||||
network,
|
||||
prefix_len,
|
||||
gateway,
|
||||
first_host,
|
||||
broadcast,
|
||||
})
|
||||
}
|
||||
|
||||
/// Increment an IPv4 address by one
|
||||
fn next_ip(ip: Ipv4Addr) -> Ipv4Addr {
|
||||
Ipv4Addr::from(u32::from(ip) + 1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use reddwarf_storage::RedbBackend;
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn make_test_ipam(cidr: &str) -> Ipam {
|
||||
let dir = tempdir().unwrap();
|
||||
let db_path = dir.path().join("test-ipam.redb");
|
||||
let storage = Arc::new(RedbBackend::new(&db_path).unwrap());
|
||||
// We need to keep tempdir alive for the duration, but for tests
|
||||
// we leak it to avoid dropping the temp dir too early
|
||||
std::mem::forget(dir);
|
||||
Ipam::new(storage, cidr).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_cidr_valid() {
|
||||
let cidr = parse_cidr("10.88.0.0/16").unwrap();
|
||||
assert_eq!(cidr.network, Ipv4Addr::new(10, 88, 0, 0));
|
||||
assert_eq!(cidr.prefix_len, 16);
|
||||
assert_eq!(cidr.gateway, Ipv4Addr::new(10, 88, 0, 1));
|
||||
assert_eq!(cidr.first_host, Ipv4Addr::new(10, 88, 0, 2));
|
||||
assert_eq!(cidr.broadcast, Ipv4Addr::new(10, 88, 255, 255));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_cidr_slash24() {
|
||||
let cidr = parse_cidr("192.168.1.0/24").unwrap();
|
||||
assert_eq!(cidr.network, Ipv4Addr::new(192, 168, 1, 0));
|
||||
assert_eq!(cidr.gateway, Ipv4Addr::new(192, 168, 1, 1));
|
||||
assert_eq!(cidr.first_host, Ipv4Addr::new(192, 168, 1, 2));
|
||||
assert_eq!(cidr.broadcast, Ipv4Addr::new(192, 168, 1, 255));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_cidr_invalid() {
|
||||
assert!(parse_cidr("not-a-cidr").is_err());
|
||||
assert!(parse_cidr("10.88.0.0").is_err());
|
||||
assert!(parse_cidr("10.88.0.0/33").is_err());
|
||||
assert!(parse_cidr("bad/16").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_allocate_sequential() {
|
||||
let ipam = make_test_ipam("10.88.0.0/16");
|
||||
|
||||
let alloc1 = ipam.allocate("default", "pod-a").unwrap();
|
||||
assert_eq!(alloc1.ip_address, Ipv4Addr::new(10, 88, 0, 2));
|
||||
assert_eq!(alloc1.gateway, Ipv4Addr::new(10, 88, 0, 1));
|
||||
assert_eq!(alloc1.prefix_len, 16);
|
||||
|
||||
let alloc2 = ipam.allocate("default", "pod-b").unwrap();
|
||||
assert_eq!(alloc2.ip_address, Ipv4Addr::new(10, 88, 0, 3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_allocate_idempotent() {
|
||||
let ipam = make_test_ipam("10.88.0.0/16");
|
||||
|
||||
let alloc1 = ipam.allocate("default", "pod-a").unwrap();
|
||||
let alloc2 = ipam.allocate("default", "pod-a").unwrap();
|
||||
assert_eq!(alloc1.ip_address, alloc2.ip_address);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_release_and_reallocate() {
|
||||
let ipam = make_test_ipam("10.88.0.0/16");
|
||||
|
||||
let alloc1 = ipam.allocate("default", "pod-a").unwrap();
|
||||
let first_ip = alloc1.ip_address;
|
||||
|
||||
// Allocate a second pod
|
||||
let _alloc2 = ipam.allocate("default", "pod-b").unwrap();
|
||||
|
||||
// Release first pod
|
||||
let released = ipam.release("default", "pod-a").unwrap();
|
||||
assert_eq!(released, Some(first_ip));
|
||||
|
||||
// New pod should reuse the freed IP
|
||||
let alloc3 = ipam.allocate("default", "pod-c").unwrap();
|
||||
assert_eq!(alloc3.ip_address, first_ip);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pool_exhaustion() {
|
||||
// /30 gives us network .0, gateway .1, one host .2, broadcast .3
|
||||
let ipam = make_test_ipam("10.0.0.0/30");
|
||||
|
||||
// First allocation should succeed (.2)
|
||||
let alloc = ipam.allocate("default", "pod-a").unwrap();
|
||||
assert_eq!(alloc.ip_address, Ipv4Addr::new(10, 0, 0, 2));
|
||||
|
||||
// Second allocation should fail (only .2 is usable, .3 is broadcast)
|
||||
let result = ipam.allocate("default", "pod-b");
|
||||
assert!(matches!(
|
||||
result.unwrap_err(),
|
||||
RuntimeError::IpamPoolExhausted { .. }
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_all_allocations() {
|
||||
let ipam = make_test_ipam("10.88.0.0/16");
|
||||
|
||||
ipam.allocate("default", "pod-a").unwrap();
|
||||
ipam.allocate("kube-system", "pod-b").unwrap();
|
||||
|
||||
let allocs = ipam.get_all_allocations().unwrap();
|
||||
assert_eq!(allocs.len(), 2);
|
||||
assert_eq!(allocs[&Ipv4Addr::new(10, 88, 0, 2)], "default/pod-a");
|
||||
assert_eq!(allocs[&Ipv4Addr::new(10, 88, 0, 3)], "kube-system/pod-b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_release_nonexistent() {
|
||||
let ipam = make_test_ipam("10.88.0.0/16");
|
||||
let released = ipam.release("default", "nonexistent").unwrap();
|
||||
assert_eq!(released, None);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
pub mod ipam;
|
||||
pub mod types;
|
||||
|
||||
pub use crate::types::{DirectNicConfig, EtherstubConfig, NetworkMode};
|
||||
pub use ipam::{CidrConfig, IpAllocation, Ipam};
|
||||
|
||||
/// Generate a VNIC name from pod namespace and name
|
||||
pub fn vnic_name_for_pod(namespace: &str, pod_name: &str) -> String {
|
||||
|
|
|
|||
|
|
@ -104,6 +104,8 @@ pub struct EtherstubConfig {
|
|||
pub ip_address: String,
|
||||
/// Gateway address
|
||||
pub gateway: String,
|
||||
/// CIDR prefix length (e.g., 16 for /16)
|
||||
pub prefix_len: u8,
|
||||
}
|
||||
|
||||
/// Direct NIC-based network configuration
|
||||
|
|
@ -117,6 +119,8 @@ pub struct DirectNicConfig {
|
|||
pub ip_address: String,
|
||||
/// Gateway address
|
||||
pub gateway: String,
|
||||
/// CIDR prefix length (e.g., 16 for /16)
|
||||
pub prefix_len: u8,
|
||||
}
|
||||
|
||||
/// ZFS dataset configuration for zone storage
|
||||
|
|
|
|||
|
|
@ -11,12 +11,24 @@ pub fn generate_zonecfg(config: &ZoneConfig) -> Result<String> {
|
|||
lines.push("set ip-type=exclusive".to_string());
|
||||
|
||||
// Network resource
|
||||
let vnic_name = match &config.network {
|
||||
NetworkMode::Etherstub(cfg) => &cfg.vnic_name,
|
||||
NetworkMode::Direct(cfg) => &cfg.vnic_name,
|
||||
let (vnic_name, ip_address, gateway, prefix_len) = match &config.network {
|
||||
NetworkMode::Etherstub(cfg) => (
|
||||
&cfg.vnic_name,
|
||||
&cfg.ip_address,
|
||||
&cfg.gateway,
|
||||
cfg.prefix_len,
|
||||
),
|
||||
NetworkMode::Direct(cfg) => (
|
||||
&cfg.vnic_name,
|
||||
&cfg.ip_address,
|
||||
&cfg.gateway,
|
||||
cfg.prefix_len,
|
||||
),
|
||||
};
|
||||
lines.push("add net".to_string());
|
||||
lines.push(format!("set physical={}", vnic_name));
|
||||
lines.push(format!("set allowed-address={}/{}", ip_address, prefix_len));
|
||||
lines.push(format!("set defrouter={}", gateway));
|
||||
lines.push("end".to_string());
|
||||
|
||||
// CPU cap
|
||||
|
|
@ -67,6 +79,7 @@ mod tests {
|
|||
vnic_name: "vnic0".to_string(),
|
||||
ip_address: "10.0.0.2".to_string(),
|
||||
gateway: "10.0.0.1".to_string(),
|
||||
prefix_len: 16,
|
||||
}),
|
||||
zfs: ZfsConfig {
|
||||
parent_dataset: "rpool/zones".to_string(),
|
||||
|
|
@ -85,6 +98,8 @@ mod tests {
|
|||
assert!(result.contains("set zonepath=/zones/test-zone"));
|
||||
assert!(result.contains("set ip-type=exclusive"));
|
||||
assert!(result.contains("set physical=vnic0"));
|
||||
assert!(result.contains("set allowed-address=10.0.0.2/16"));
|
||||
assert!(result.contains("set defrouter=10.0.0.1"));
|
||||
assert!(result.contains("set ncpus=2.0"));
|
||||
assert!(result.contains("set physical=1G"));
|
||||
assert!(result.contains("verify"));
|
||||
|
|
@ -102,6 +117,7 @@ mod tests {
|
|||
vnic_name: "vnic1".to_string(),
|
||||
ip_address: "192.168.1.10".to_string(),
|
||||
gateway: "192.168.1.1".to_string(),
|
||||
prefix_len: 24,
|
||||
}),
|
||||
zfs: ZfsConfig {
|
||||
parent_dataset: "rpool/zones".to_string(),
|
||||
|
|
@ -128,6 +144,8 @@ mod tests {
|
|||
let result = generate_zonecfg(&config).unwrap();
|
||||
assert!(result.contains("set brand=reddwarf"));
|
||||
assert!(result.contains("set physical=vnic1"));
|
||||
assert!(result.contains("set allowed-address=192.168.1.10/24"));
|
||||
assert!(result.contains("set defrouter=192.168.1.1"));
|
||||
assert!(result.contains("set physical=512M"));
|
||||
assert!(result.contains("add fs"));
|
||||
assert!(result.contains("set dir=/etc/app"));
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ use clap::{Parser, Subcommand};
|
|||
use reddwarf_apiserver::{ApiError, ApiServer, AppState, Config as ApiConfig};
|
||||
use reddwarf_core::Namespace;
|
||||
use reddwarf_runtime::{
|
||||
ApiClient, EtherstubConfig, MockRuntime, NetworkMode, NodeAgent, NodeAgentConfig,
|
||||
PodController, PodControllerConfig, ZoneBrand,
|
||||
ApiClient, Ipam, MockRuntime, NodeAgent, NodeAgentConfig, PodController, PodControllerConfig,
|
||||
ZoneBrand,
|
||||
};
|
||||
use reddwarf_scheduler::scheduler::SchedulerConfig;
|
||||
use reddwarf_scheduler::Scheduler;
|
||||
|
|
@ -48,6 +48,12 @@ enum Commands {
|
|||
/// Parent ZFS dataset for zone storage
|
||||
#[arg(long, default_value = "rpool/zones")]
|
||||
zfs_parent: String,
|
||||
/// Pod network CIDR for IPAM allocation
|
||||
#[arg(long, default_value = "10.88.0.0/16")]
|
||||
pod_cidr: String,
|
||||
/// Etherstub name for pod networking
|
||||
#[arg(long, default_value = "reddwarf0")]
|
||||
etherstub_name: String,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +77,20 @@ async fn main() -> miette::Result<()> {
|
|||
data_dir,
|
||||
zonepath_prefix,
|
||||
zfs_parent,
|
||||
} => run_agent(&node_name, &bind, &data_dir, &zonepath_prefix, &zfs_parent).await,
|
||||
pod_cidr,
|
||||
etherstub_name,
|
||||
} => {
|
||||
run_agent(
|
||||
&node_name,
|
||||
&bind,
|
||||
&data_dir,
|
||||
&zonepath_prefix,
|
||||
&zfs_parent,
|
||||
&pod_cidr,
|
||||
ðerstub_name,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -105,6 +124,8 @@ async fn run_agent(
|
|||
data_dir: &str,
|
||||
zonepath_prefix: &str,
|
||||
zfs_parent: &str,
|
||||
pod_cidr: &str,
|
||||
etherstub_name: &str,
|
||||
) -> miette::Result<()> {
|
||||
info!("Starting reddwarf agent for node '{}'", node_name);
|
||||
|
||||
|
|
@ -158,7 +179,12 @@ async fn run_agent(
|
|||
// 3. Create runtime (MockRuntime on non-illumos, IllumosRuntime on illumos)
|
||||
let runtime: Arc<dyn reddwarf_runtime::ZoneRuntime> = create_runtime();
|
||||
|
||||
// 4. Spawn pod controller
|
||||
// 4. Create IPAM for per-pod IP allocation
|
||||
let ipam = Ipam::new(state.storage.clone(), pod_cidr).map_err(|e| {
|
||||
miette::miette!("Failed to initialize IPAM with CIDR '{}': {}", pod_cidr, e)
|
||||
})?;
|
||||
|
||||
// 5. Spawn pod controller
|
||||
let api_client = Arc::new(ApiClient::new(&api_url));
|
||||
let controller_config = PodControllerConfig {
|
||||
node_name: node_name.to_string(),
|
||||
|
|
@ -166,12 +192,8 @@ async fn run_agent(
|
|||
zonepath_prefix: zonepath_prefix.to_string(),
|
||||
zfs_parent_dataset: zfs_parent.to_string(),
|
||||
default_brand: ZoneBrand::Reddwarf,
|
||||
network: NetworkMode::Etherstub(EtherstubConfig {
|
||||
etherstub_name: "reddwarf0".to_string(),
|
||||
vnic_name: "reddwarf_vnic0".to_string(),
|
||||
ip_address: "10.88.0.2".to_string(),
|
||||
gateway: "10.88.0.1".to_string(),
|
||||
}),
|
||||
etherstub_name: etherstub_name.to_string(),
|
||||
pod_cidr: pod_cidr.to_string(),
|
||||
};
|
||||
|
||||
let controller = PodController::new(
|
||||
|
|
@ -179,6 +201,7 @@ async fn run_agent(
|
|||
api_client.clone(),
|
||||
state.event_tx.clone(),
|
||||
controller_config,
|
||||
ipam,
|
||||
);
|
||||
let controller_token = token.clone();
|
||||
let controller_handle = tokio::spawn(async move {
|
||||
|
|
@ -187,7 +210,7 @@ async fn run_agent(
|
|||
}
|
||||
});
|
||||
|
||||
// 5. Spawn node agent
|
||||
// 6. Spawn node agent
|
||||
let node_agent_config = NodeAgentConfig::new(node_name.to_string(), api_url);
|
||||
let node_agent = NodeAgent::new(api_client, node_agent_config);
|
||||
let agent_token = token.clone();
|
||||
|
|
@ -198,8 +221,8 @@ async fn run_agent(
|
|||
});
|
||||
|
||||
info!(
|
||||
"All components started. API server on {}, node name: {}",
|
||||
bind, node_name
|
||||
"All components started. API server on {}, node name: {}, pod CIDR: {}",
|
||||
bind, node_name, pod_cidr
|
||||
);
|
||||
|
||||
// Wait for shutdown signal
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue