Organize repository files into hierarchical directory structure for improved scalability

- Update `FileBackend` to store files in a multi-level directory structure (`xx/yy/hash`) based on hash prefixes.
- Adjust method signatures and logic across `FileBackend`, `pkg6dev`, and `pkg6repo` to support mutable repository operations.
- Add test cases and scripts (`test_file_structure.rs`, `test_repo_operations.sh`) to validate the new file structure.
- Refactor catalog manager to use `RefCell` for interior mutability to enable lazy initialization.
- Clean up redundant and dead code, enhance comments, and replace outdated logic for file handling and metadata operations.
This commit is contained in:
Till Wegmueller 2025-07-27 13:15:52 +02:00
parent 633e742528
commit 7f4ecf6346
No known key found for this signature in database
8 changed files with 235 additions and 37 deletions

View file

@ -197,7 +197,8 @@ pub struct FileBackend {
pub path: PathBuf,
pub config: RepositoryConfig,
/// Catalog manager for handling catalog operations
catalog_manager: Option<crate::repository::catalog::CatalogManager>,
/// Uses RefCell for interior mutability to allow mutation through immutable references
catalog_manager: Option<std::cell::RefCell<crate::repository::catalog::CatalogManager>>,
}
/// Format a SystemTime as an ISO 8601 timestamp string
@ -415,8 +416,23 @@ impl Transaction {
// Copy files to their final location
for (source_path, hash) in self.files {
// Create the destination path in the files directory
let dest_path = self.repo.join("file").join(&hash);
// Create the destination path using the new directory structure
let dest_path = if hash.len() < 4 {
// Fallback for very short hashes (shouldn't happen with SHA256)
self.repo.join("file").join(&hash)
} else {
// Extract the first two and next two characters from the hash
let first_two = &hash[0..2];
let next_two = &hash[2..4];
// Create the path: $REPO/file/XX/YY/XXYY...
self.repo.join("file").join(first_two).join(next_two).join(&hash)
};
// Create parent directories if they don't exist
if let Some(parent) = dest_path.parent() {
fs::create_dir_all(parent)?;
}
// Copy the file if it doesn't already exist
if !dest_path.exists() {
@ -1211,7 +1227,7 @@ impl WritableRepository for FileBackend {
}
/// Rebuild repository metadata
fn rebuild(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
fn rebuild(&mut self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
// Filter publishers if specified
let publishers = if let Some(pub_name) = publisher {
if !self.config.publishers.contains(&pub_name.to_string()) {
@ -1228,7 +1244,7 @@ impl WritableRepository for FileBackend {
if !no_catalog {
info!("Rebuilding catalog...");
// In a real implementation, we would rebuild the catalog
self.generate_catalog_parts(&pub_name, true)?;
}
if !no_index {
@ -1241,7 +1257,7 @@ impl WritableRepository for FileBackend {
}
/// Refresh repository metadata
fn refresh(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
fn refresh(&mut self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
// Filter publishers if specified
let publishers = if let Some(pub_name) = publisher {
if !self.config.publishers.contains(&pub_name.to_string()) {
@ -1258,22 +1274,12 @@ impl WritableRepository for FileBackend {
if !no_catalog {
info!("Refreshing catalog...");
// In a real implementation, we would refresh the catalog
self.generate_catalog_parts(&pub_name, true)?;
}
if !no_index {
info!("Refreshing search index...");
// Check if the index exists
let index_path = self.path.join("index").join(&pub_name).join("search.json");
if !index_path.exists() {
// If the index doesn't exist, build it
self.build_search_index(&pub_name)?;
} else {
// If the index exists, update it
// For simplicity, we'll just rebuild it
self.build_search_index(&pub_name)?;
}
self.build_search_index(&pub_name)?;
}
}
@ -1310,22 +1316,41 @@ impl FileBackend {
Ok(())
}
/// Get or initialize the catalog manager
pub fn get_catalog_manager(
&mut self,
) -> Result<&mut crate::repository::catalog::CatalogManager> {
if self.catalog_manager.is_none() {
let catalog_dir = self.path.join("catalog");
self.catalog_manager = Some(crate::repository::catalog::CatalogManager::new(
&catalog_dir,
)?);
/// Generate the file path for a given hash using the new directory structure
/// The path will be $REPO/file/XX/YY/XXYY... where XX and YY are the first four letters of the hash
fn generate_file_path(&self, hash: &str) -> PathBuf {
if hash.len() < 4 {
// Fallback for very short hashes (shouldn't happen with SHA256)
return self.path.join("file").join(hash);
}
Ok(self.catalog_manager.as_mut().unwrap())
// Extract the first two and next two characters from the hash
let first_two = &hash[0..2];
let next_two = &hash[2..4];
// Create the path: $REPO/file/XX/YY/XXYY...
self.path.join("file").join(first_two).join(next_two).join(hash)
}
/// Get or initialize the catalog manager
///
/// This method returns a mutable reference to the catalog manager.
/// It uses interior mutability with RefCell to allow mutation through an immutable reference.
pub fn get_catalog_manager(
&mut self,
) -> Result<std::cell::RefMut<crate::repository::catalog::CatalogManager>> {
if self.catalog_manager.is_none() {
let catalog_dir = self.path.join("catalog");
let manager = crate::repository::catalog::CatalogManager::new(&catalog_dir)?;
let refcell = std::cell::RefCell::new(manager);
self.catalog_manager = Some(refcell);
}
// This is safe because we just checked that catalog_manager is Some
Ok(self.catalog_manager.as_ref().unwrap().borrow_mut())
}
/// URL encode a string for use in a filename
#[allow(dead_code)]
fn url_encode(s: &str) -> String {
let mut result = String::new();
for c in s.chars() {
@ -1342,7 +1367,6 @@ impl FileBackend {
}
/// Generate catalog parts for a publisher
#[allow(dead_code)]
fn generate_catalog_parts(&mut self, publisher: &str, create_update_log: bool) -> Result<()> {
info!("Generating catalog parts for publisher: {}", publisher);
@ -1471,7 +1495,7 @@ impl FileBackend {
}
// Now get the catalog manager and create the catalog parts
let catalog_manager = self.get_catalog_manager()?;
let mut catalog_manager = self.get_catalog_manager()?;
// Create and populate the base part
let base_part_name = "catalog.base.C".to_string();
@ -1796,7 +1820,7 @@ impl FileBackend {
// Verify the file was stored
let hash = Transaction::calculate_file_hash(&test_file_path)?;
let stored_file_path = self.path.join("file").join(&hash);
let stored_file_path = self.generate_file_path(&hash);
if !stored_file_path.exists() {
return Err(RepositoryError::Other("File was not stored correctly".to_string()));
@ -1827,7 +1851,7 @@ impl FileBackend {
}
/// Publish files from a prototype directory
pub fn publish_files<P: AsRef<Path>>(&self, proto_dir: P, publisher: &str) -> Result<()> {
pub fn publish_files<P: AsRef<Path>>(&mut self, proto_dir: P, publisher: &str) -> Result<()> {
let proto_dir = proto_dir.as_ref();
// Check if the prototype directory exists
@ -1901,8 +1925,13 @@ impl FileBackend {
// Calculate the SHA256 hash of the file
let hash = Transaction::calculate_file_hash(file_path)?;
// Create the destination path in the files directory
let dest_path = self.path.join("file").join(&hash);
// Create the destination path using the new directory structure
let dest_path = self.generate_file_path(&hash);
// Create parent directories if they don't exist
if let Some(parent) = dest_path.parent() {
fs::create_dir_all(parent)?;
}
// Copy the file if it doesn't already exist
if !dest_path.exists() {

View file

@ -330,10 +330,10 @@ pub trait WritableRepository {
) -> Result<()>;
/// Rebuild repository metadata
fn rebuild(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()>;
fn rebuild(&mut self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()>;
/// Refresh repository metadata
fn refresh(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()>;
fn refresh(&mut self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()>;
/// Set the default publisher for the repository
fn set_default_publisher(&mut self, publisher: &str) -> Result<()>;

View file

@ -89,7 +89,7 @@ impl WritableRepository for RestBackend {
}
/// Rebuild repository metadata
fn rebuild(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
fn rebuild(&mut self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
// This is a stub implementation
// In a real implementation, we would make a REST API call to rebuild metadata
@ -122,7 +122,7 @@ impl WritableRepository for RestBackend {
}
/// Refresh repository metadata
fn refresh(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
fn refresh(&mut self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
// This is a stub implementation
// In a real implementation, we would make a REST API call to refresh metadata

View file

@ -393,4 +393,39 @@ mod tests {
// Clean up
cleanup_test_dir(&test_dir);
}
#[test]
fn test_file_structure() {
// Create a test directory
let test_dir = create_test_dir("file_structure");
let repo_path = test_dir.join("repo");
// Create a repository
let mut repo = FileBackend::create(&repo_path, RepositoryVersion::V4).unwrap();
// Add a publisher
repo.add_publisher("test").unwrap();
// Create a test file
let test_file_path = test_dir.join("test_file.txt");
fs::write(&test_file_path, "This is a test file").unwrap();
// Store the file in the repository
let hash = repo.store_file(&test_file_path).unwrap();
// Check if the file was stored in the correct directory structure
let first_two = &hash[0..2];
let next_two = &hash[2..4];
let expected_path = repo_path.join("file").join(first_two).join(next_two).join(&hash);
// Verify that the file exists at the expected path
assert!(expected_path.exists(), "File was not stored at the expected path: {}", expected_path.display());
// Verify that the file does NOT exist at the old path
let old_path = repo_path.join("file").join(&hash);
assert!(!old_path.exists(), "File was stored at the old path: {}", old_path.display());
// Clean up
cleanup_test_dir(&test_dir);
}
}

View file

@ -426,7 +426,7 @@ fn publish_package(
// Open the repository
info!("Opening repository at: {}", repo_path.display());
let repo = match FileBackend::open(repo_path) {
let mut repo = match FileBackend::open(repo_path) {
Ok(repo) => repo,
Err(_) => {
info!("Repository does not exist, creating a new one...");

View file

@ -868,7 +868,7 @@ fn main() -> Result<()> {
// Open the repository
// In a real implementation with RestBackend, the key and cert parameters would be used for SSL authentication
// For now, we're using FileBackend, which doesn't use these parameters
let repo = FileBackend::open(repo_uri_or_path)?;
let mut repo = FileBackend::open(repo_uri_or_path)?;
// Get the publisher if specified
let pub_option = if let Some(publishers) = publisher {
@ -899,7 +899,7 @@ fn main() -> Result<()> {
// Open the repository
// In a real implementation with RestBackend, the key and cert parameters would be used for SSL authentication
// For now, we're using FileBackend, which doesn't use these parameters
let repo = FileBackend::open(repo_uri_or_path)?;
let mut repo = FileBackend::open(repo_uri_or_path)?;
// Get the publisher if specified
let pub_option = if let Some(publishers) = publisher {

57
test_file_structure.rs Normal file
View file

@ -0,0 +1,57 @@
use std::fs;
use std::path::Path;
use libips::repository::{FileBackend, WritableRepository, ReadableRepository, RepositoryVersion};
fn main() -> Result<(), Box<dyn std::error::Error>> {
// Create a temporary directory for the test
let test_dir = Path::new("/tmp/pkg6_file_structure_test");
if test_dir.exists() {
fs::remove_dir_all(test_dir)?;
}
fs::create_dir_all(test_dir)?;
println!("Created test directory: {}", test_dir.display());
// Create a new repository
let mut repo = FileBackend::create(test_dir, RepositoryVersion::V1)?;
// Add a publisher
repo.add_publisher("test")?;
println!("Created repository with publisher 'test'");
// Create a test file
let test_file_path = test_dir.join("test_file.txt");
fs::write(&test_file_path, "This is a test file")?;
println!("Created test file: {}", test_file_path.display());
// Store the file in the repository
let hash = repo.store_file(&test_file_path)?;
println!("Stored file with hash: {}", hash);
// Check if the file was stored in the correct directory structure
let first_two = &hash[0..2];
let next_two = &hash[2..4];
let expected_path = test_dir.join("file").join(first_two).join(next_two).join(&hash);
if expected_path.exists() {
println!("SUCCESS: File was stored at the correct path: {}", expected_path.display());
} else {
println!("ERROR: File was not stored at the expected path: {}", expected_path.display());
// Check if the file was stored in the old location
let old_path = test_dir.join("file").join(&hash);
if old_path.exists() {
println!("File was stored at the old path: {}", old_path.display());
} else {
println!("File was not stored at the old path either");
}
}
// Clean up
fs::remove_dir_all(test_dir)?;
Ok(())
}

77
test_repo_operations.sh Executable file
View file

@ -0,0 +1,77 @@
#!/bin/bash
# Test script to verify that repository operations work with the new directory structure
# Create a temporary directory for the test
TEST_DIR="/tmp/pkg6_repo_operations_test"
rm -rf "$TEST_DIR"
mkdir -p "$TEST_DIR"
echo "Created test directory: $TEST_DIR"
# Create a repository
echo "Creating repository..."
cargo run --bin pkg6repo -- create "$TEST_DIR/repo"
# Add a publisher
echo "Adding publisher..."
cargo run --bin pkg6repo -- add-publisher -r "$TEST_DIR/repo" test
# Create a test file
echo "Creating test file..."
echo "This is a test file" > "$TEST_DIR/test_file.txt"
# Create a simple manifest
echo "Creating manifest..."
cat > "$TEST_DIR/test.p5m" << EOF
{
"attributes": [
{
"key": "pkg.fmri",
"values": [
"pkg://test/example@1.0,5.11-0:20250727T123000Z"
],
"properties": {}
}
],
"files": [
{
"path": "usr/share/doc/example/test.txt",
"mode": "0644",
"owner": "root",
"group": "root"
}
]
}
EOF
# Create a prototype directory
echo "Creating prototype directory..."
mkdir -p "$TEST_DIR/prototype/usr/share/doc/example"
cp "$TEST_DIR/test_file.txt" "$TEST_DIR/prototype/usr/share/doc/example/test.txt"
# Publish the package
echo "Publishing package..."
cargo run --bin pkg6repo -- publish -r "$TEST_DIR/repo" -p test -m "$TEST_DIR/test.p5m" "$TEST_DIR/prototype"
# Check if the file was stored in the correct directory structure
echo "Checking file structure..."
find "$TEST_DIR/repo/file" -type f | while read -r file; do
hash=$(basename "$file")
first_two=${hash:0:2}
next_two=${hash:2:2}
expected_path="$TEST_DIR/repo/file/$first_two/$next_two/$hash"
if [ "$file" = "$expected_path" ]; then
echo "SUCCESS: File was stored at the correct path: $file"
else
echo "ERROR: File was stored at an unexpected path: $file"
echo "Expected: $expected_path"
fi
done
# Clean up
echo "Cleaning up..."
rm -rf "$TEST_DIR"
echo "Test completed."