Add batched catalog rebuild support and INI-only repo test

- Introduced `BatchOptions` for configurable batched catalog rebuilds.
- Added `rebuild_catalog_batched` in `file_backend` for batched processing with deterministic package order and progress tracking.
- Updated `rebuild_catalog` to leverage the new batched implementation with default options.
- Added integration test `test_ini_only_repo_serving_catalog` to validate functionality with an INI-only repository configuration.
- Enhanced test coverage for legacy catalog handling via `pkg5.repository`.
This commit is contained in:
Till Wegmueller 2025-12-09 14:23:55 +01:00
parent bd67e06012
commit e87d1a3166
No known key found for this signature in database
3 changed files with 123 additions and 6 deletions

View file

@ -2167,12 +2167,25 @@ impl FileBackend {
Ok(()) Ok(())
} }
/// Rebuild catalog for a publisher /// Rebuild catalog for a publisher (delegates to the batched implementation with defaults)
///
/// This method generates catalog files for a publisher and stores them in the publisher's
/// catalog directory.
pub fn rebuild_catalog(&self, publisher: &str, create_update_log: bool) -> Result<()> { pub fn rebuild_catalog(&self, publisher: &str, create_update_log: bool) -> Result<()> {
info!("Rebuilding catalog for publisher: {}", publisher); let opts = crate::repository::BatchOptions::default();
self.rebuild_catalog_batched(publisher, create_update_log, opts)
}
/// Rebuild catalog for a publisher using a batched algorithm.
///
/// Batching currently streams package processing while accumulating in-memory
/// structures for the three catalog parts and optional update log, and emits
/// progress spans per batch. Future work may flush partial structures to
/// disk, but the public API is stable.
pub fn rebuild_catalog_batched(
&self,
publisher: &str,
create_update_log: bool,
opts: crate::repository::BatchOptions,
) -> Result<()> {
info!("Rebuilding catalog (batched) for publisher: {}", publisher);
// Create the catalog directory for the publisher if it doesn't exist // Create the catalog directory for the publisher if it doesn't exist
let catalog_dir = Self::construct_catalog_path(&self.path, publisher); let catalog_dir = Self::construct_catalog_path(&self.path, publisher);
@ -2193,7 +2206,13 @@ impl FileBackend {
let mut package_count = 0; let mut package_count = 0;
let mut package_version_count = 0; let mut package_version_count = 0;
// Process each package // Process each package in deterministic order (by FMRI string)
let mut packages = packages;
packages.sort_by(|a, b| a.fmri.to_string().cmp(&b.fmri.to_string()));
let mut processed_in_batch = 0usize;
let mut batch_no = 0usize;
for package in packages { for package in packages {
let fmri = &package.fmri; let fmri = &package.fmri;
let stem = fmri.stem(); let stem = fmri.stem();
@ -2311,6 +2330,13 @@ impl FileBackend {
// Update counts // Update counts
package_count += 1; package_count += 1;
package_version_count += 1; package_version_count += 1;
processed_in_batch += 1;
if processed_in_batch >= opts.batch_size {
batch_no += 1;
tracing::debug!(publisher, batch_no, processed_in_batch, "catalog rebuild batch processed");
processed_in_batch = 0;
}
} }
// Create and save catalog parts // Create and save catalog parts

View file

@ -237,6 +237,21 @@ pub use rest_backend::RestBackend;
/// Repository configuration filename /// Repository configuration filename
pub const REPOSITORY_CONFIG_FILENAME: &str = "pkg6.repository"; pub const REPOSITORY_CONFIG_FILENAME: &str = "pkg6.repository";
/// Options to control batched catalog rebuild behavior
#[derive(Debug, Clone, Copy)]
pub struct BatchOptions {
/// Number of packages processed before flushing catalog parts to disk
pub batch_size: usize,
/// How many batches between fsync/flush points (reserved for future use)
pub flush_every_n: usize,
}
impl Default for BatchOptions {
fn default() -> Self {
BatchOptions { batch_size: 2000, flush_every_n: 1 }
}
}
/// Information about a publisher in a repository /// Information about a publisher in a repository
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct PublisherInfo { pub struct PublisherInfo {

View file

@ -155,3 +155,79 @@ async fn test_depot_server() {
// 6. Test File // 6. Test File
// We assume file exists if manifest works. // We assume file exists if manifest works.
} }
#[tokio::test]
async fn test_ini_only_repo_serving_catalog() {
use libips::repository::{WritableRepository, ReadableRepository};
use libips::repository::BatchOptions;
use std::io::Write as _;
// Setup temp repo
let temp_dir = TempDir::new().unwrap();
let repo_path = temp_dir.path().join("repo_ini");
// Create repo using FileBackend to get layout, publish one pkg
let mut backend = FileBackend::create(&repo_path, RepositoryVersion::V4).unwrap();
let publisher = "ini-test";
backend.add_publisher(publisher).unwrap();
// Create content
let content_dir = temp_dir.path().join("content_ini");
fs::create_dir_all(&content_dir).unwrap();
let file_path = content_dir.join("hello.txt");
fs::write(&file_path, "Hello INI Repo").unwrap();
// Publish one manifest
let mut tx = backend.begin_transaction().unwrap();
tx.set_publisher(publisher);
let mut fa = FileAction::read_from_path(&file_path).unwrap();
fa.path = "hello.txt".to_string();
tx.add_file(fa, &file_path).unwrap();
let mut manifest = Manifest::new();
use libips::actions::Attr;
use std::collections::HashMap;
manifest.attributes.push(Attr { key: "pkg.fmri".to_string(), values: vec![format!("pkg://{}/example@1.0.0", publisher)], properties: HashMap::new() });
manifest.attributes.push(Attr { key: "pkg.summary".to_string(), values: vec!["INI Repo Test Package".to_string()], properties: HashMap::new() });
tx.update_manifest(manifest);
tx.commit().unwrap();
// Rebuild catalog using batched API explicitly with small batch to exercise code path
let opts = BatchOptions { batch_size: 1, flush_every_n: 1 };
backend.rebuild_catalog_batched(publisher, true, opts).unwrap();
// Replace pkg6.repository with legacy pkg5.repository so FileBackend::open uses INI
let pkg6_cfg = repo_path.join("pkg6.repository");
if pkg6_cfg.exists() { fs::remove_file(&pkg6_cfg).unwrap(); }
let mut ini = String::new();
ini.push_str("[publisher]\n");
ini.push_str(&format!("prefix = {}\n", publisher));
ini.push_str("[repository]\nversion = 4\n");
let mut f = std::fs::File::create(repo_path.join("pkg5.repository")).unwrap();
f.write_all(ini.as_bytes()).unwrap();
// Start depot server
let config = Config {
server: ServerConfig { bind: vec!["127.0.0.1:0".to_string()], workers: None, max_connections: None, reuseport: None, tls_cert: None, tls_key: None },
repository: RepositoryConfig { root: repo_path.clone(), mode: Some("readonly".to_string()) },
telemetry: None, publishers: None, admin: None, oauth2: None,
};
let repo = DepotRepo::new(&config).unwrap();
let state = Arc::new(repo);
let router = http::routes::app_router(state);
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move { http::server::run(router, listener).await.unwrap(); });
let client = reqwest::Client::new();
let base_url = format!("http://{}", addr);
// Fetch catalog attrs via v1 endpoint
let url = format!("{}/{}/catalog/1/catalog.attrs", base_url, publisher);
let resp = client.get(&url).send().await.unwrap();
assert!(resp.status().is_success(), "status: {:?}", resp.status());
let body = resp.text().await.unwrap();
assert!(body.contains("package-count"));
assert!(body.contains("parts"));
}