From e87d1a3166bc6bf852bcfe5e4b457fe1bd9243a6 Mon Sep 17 00:00:00 2001 From: Till Wegmueller Date: Tue, 9 Dec 2025 14:23:55 +0100 Subject: [PATCH] Add batched catalog rebuild support and INI-only repo test - Introduced `BatchOptions` for configurable batched catalog rebuilds. - Added `rebuild_catalog_batched` in `file_backend` for batched processing with deterministic package order and progress tracking. - Updated `rebuild_catalog` to leverage the new batched implementation with default options. - Added integration test `test_ini_only_repo_serving_catalog` to validate functionality with an INI-only repository configuration. - Enhanced test coverage for legacy catalog handling via `pkg5.repository`. --- libips/src/repository/file_backend.rs | 38 +++++++++++--- libips/src/repository/mod.rs | 15 ++++++ pkg6depotd/tests/integration_tests.rs | 76 +++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 6 deletions(-) diff --git a/libips/src/repository/file_backend.rs b/libips/src/repository/file_backend.rs index da130cc..2243d38 100644 --- a/libips/src/repository/file_backend.rs +++ b/libips/src/repository/file_backend.rs @@ -2167,12 +2167,25 @@ impl FileBackend { Ok(()) } - /// Rebuild catalog for a publisher - /// - /// This method generates catalog files for a publisher and stores them in the publisher's - /// catalog directory. + /// Rebuild catalog for a publisher (delegates to the batched implementation with defaults) pub fn rebuild_catalog(&self, publisher: &str, create_update_log: bool) -> Result<()> { - info!("Rebuilding catalog for publisher: {}", publisher); + let opts = crate::repository::BatchOptions::default(); + self.rebuild_catalog_batched(publisher, create_update_log, opts) + } + + /// Rebuild catalog for a publisher using a batched algorithm. + /// + /// Batching currently streams package processing while accumulating in-memory + /// structures for the three catalog parts and optional update log, and emits + /// progress spans per batch. Future work may flush partial structures to + /// disk, but the public API is stable. + pub fn rebuild_catalog_batched( + &self, + publisher: &str, + create_update_log: bool, + opts: crate::repository::BatchOptions, + ) -> Result<()> { + info!("Rebuilding catalog (batched) for publisher: {}", publisher); // Create the catalog directory for the publisher if it doesn't exist let catalog_dir = Self::construct_catalog_path(&self.path, publisher); @@ -2193,7 +2206,13 @@ impl FileBackend { let mut package_count = 0; let mut package_version_count = 0; - // Process each package + // Process each package in deterministic order (by FMRI string) + let mut packages = packages; + packages.sort_by(|a, b| a.fmri.to_string().cmp(&b.fmri.to_string())); + + let mut processed_in_batch = 0usize; + let mut batch_no = 0usize; + for package in packages { let fmri = &package.fmri; let stem = fmri.stem(); @@ -2311,6 +2330,13 @@ impl FileBackend { // Update counts package_count += 1; package_version_count += 1; + + processed_in_batch += 1; + if processed_in_batch >= opts.batch_size { + batch_no += 1; + tracing::debug!(publisher, batch_no, processed_in_batch, "catalog rebuild batch processed"); + processed_in_batch = 0; + } } // Create and save catalog parts diff --git a/libips/src/repository/mod.rs b/libips/src/repository/mod.rs index 9f1d294..88f613b 100644 --- a/libips/src/repository/mod.rs +++ b/libips/src/repository/mod.rs @@ -237,6 +237,21 @@ pub use rest_backend::RestBackend; /// Repository configuration filename pub const REPOSITORY_CONFIG_FILENAME: &str = "pkg6.repository"; +/// Options to control batched catalog rebuild behavior +#[derive(Debug, Clone, Copy)] +pub struct BatchOptions { + /// Number of packages processed before flushing catalog parts to disk + pub batch_size: usize, + /// How many batches between fsync/flush points (reserved for future use) + pub flush_every_n: usize, +} + +impl Default for BatchOptions { + fn default() -> Self { + BatchOptions { batch_size: 2000, flush_every_n: 1 } + } +} + /// Information about a publisher in a repository #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct PublisherInfo { diff --git a/pkg6depotd/tests/integration_tests.rs b/pkg6depotd/tests/integration_tests.rs index e88aa4e..6f1ab96 100644 --- a/pkg6depotd/tests/integration_tests.rs +++ b/pkg6depotd/tests/integration_tests.rs @@ -155,3 +155,79 @@ async fn test_depot_server() { // 6. Test File // We assume file exists if manifest works. } + +#[tokio::test] +async fn test_ini_only_repo_serving_catalog() { + use libips::repository::{WritableRepository, ReadableRepository}; + use libips::repository::BatchOptions; + use std::io::Write as _; + + // Setup temp repo + let temp_dir = TempDir::new().unwrap(); + let repo_path = temp_dir.path().join("repo_ini"); + + // Create repo using FileBackend to get layout, publish one pkg + let mut backend = FileBackend::create(&repo_path, RepositoryVersion::V4).unwrap(); + let publisher = "ini-test"; + backend.add_publisher(publisher).unwrap(); + + // Create content + let content_dir = temp_dir.path().join("content_ini"); + fs::create_dir_all(&content_dir).unwrap(); + let file_path = content_dir.join("hello.txt"); + fs::write(&file_path, "Hello INI Repo").unwrap(); + + // Publish one manifest + let mut tx = backend.begin_transaction().unwrap(); + tx.set_publisher(publisher); + let mut fa = FileAction::read_from_path(&file_path).unwrap(); + fa.path = "hello.txt".to_string(); + tx.add_file(fa, &file_path).unwrap(); + + let mut manifest = Manifest::new(); + use libips::actions::Attr; + use std::collections::HashMap; + manifest.attributes.push(Attr { key: "pkg.fmri".to_string(), values: vec![format!("pkg://{}/example@1.0.0", publisher)], properties: HashMap::new() }); + manifest.attributes.push(Attr { key: "pkg.summary".to_string(), values: vec!["INI Repo Test Package".to_string()], properties: HashMap::new() }); + tx.update_manifest(manifest); + tx.commit().unwrap(); + + // Rebuild catalog using batched API explicitly with small batch to exercise code path + let opts = BatchOptions { batch_size: 1, flush_every_n: 1 }; + backend.rebuild_catalog_batched(publisher, true, opts).unwrap(); + + // Replace pkg6.repository with legacy pkg5.repository so FileBackend::open uses INI + let pkg6_cfg = repo_path.join("pkg6.repository"); + if pkg6_cfg.exists() { fs::remove_file(&pkg6_cfg).unwrap(); } + let mut ini = String::new(); + ini.push_str("[publisher]\n"); + ini.push_str(&format!("prefix = {}\n", publisher)); + ini.push_str("[repository]\nversion = 4\n"); + let mut f = std::fs::File::create(repo_path.join("pkg5.repository")).unwrap(); + f.write_all(ini.as_bytes()).unwrap(); + + // Start depot server + let config = Config { + server: ServerConfig { bind: vec!["127.0.0.1:0".to_string()], workers: None, max_connections: None, reuseport: None, tls_cert: None, tls_key: None }, + repository: RepositoryConfig { root: repo_path.clone(), mode: Some("readonly".to_string()) }, + telemetry: None, publishers: None, admin: None, oauth2: None, + }; + let repo = DepotRepo::new(&config).unwrap(); + let state = Arc::new(repo); + let router = http::routes::app_router(state); + + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { http::server::run(router, listener).await.unwrap(); }); + + let client = reqwest::Client::new(); + let base_url = format!("http://{}", addr); + + // Fetch catalog attrs via v1 endpoint + let url = format!("{}/{}/catalog/1/catalog.attrs", base_url, publisher); + let resp = client.get(&url).send().await.unwrap(); + assert!(resp.status().is_success(), "status: {:?}", resp.status()); + let body = resp.text().await.unwrap(); + assert!(body.contains("package-count")); + assert!(body.contains("parts")); +}