Use walkdir for efficient recursive traversal in FileBackend::build_search_index

- Replace manual recursive directory scanning with `walkdir` for simplified and efficient file traversal.
- Update dependencies in `Cargo.toml` and `Cargo.lock` to include `walkdir`.
- Enhance code maintainability by replacing redundant logic with streamlined traversal and processing.
This commit is contained in:
Till Wegmueller 2025-07-27 19:48:52 +02:00
parent 1286db23fd
commit 81eb4a7447
No known key found for this signature in database
3 changed files with 168 additions and 323 deletions

20
Cargo.lock generated
View file

@ -973,6 +973,7 @@ dependencies = [
"tempfile", "tempfile",
"thiserror 1.0.69", "thiserror 1.0.69",
"tracing", "tracing",
"walkdir",
] ]
[[package]] [[package]]
@ -1691,6 +1692,15 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]] [[package]]
name = "schannel" name = "schannel"
version = "0.1.27" version = "0.1.27"
@ -2336,6 +2346,16 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]] [[package]]
name = "want" name = "want"
version = "0.3.1" version = "0.3.1"

View file

@ -35,3 +35,4 @@ semver = { version = "1.0.20", features = ["serde"] }
diff-struct = "0.5.3" diff-struct = "0.5.3"
chrono = "0.4.41" chrono = "0.4.41"
tempfile = "3.20.0" tempfile = "3.20.0"
walkdir = "2.4.0"

View file

@ -18,6 +18,7 @@ use std::path::{Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use tracing::{debug, error, info}; use tracing::{debug, error, info};
use walkdir::WalkDir;
use crate::actions::{File as FileAction, Manifest}; use crate::actions::{File as FileAction, Manifest};
use crate::digest::Digest; use crate::digest::Digest;
@ -1988,318 +1989,120 @@ impl FileBackend {
// Check if the publisher directory exists // Check if the publisher directory exists
if publisher_pkg_dir.exists() { if publisher_pkg_dir.exists() {
// Walk through the directory and process package manifests // Use walkdir to recursively walk through the directory and process package manifests
if let Ok(entries) = fs::read_dir(&publisher_pkg_dir) { for entry in WalkDir::new(&publisher_pkg_dir)
for entry in entries.flatten() { .follow_links(true)
let path = entry.path(); .into_iter()
.filter_map(|e| e.ok())
if path.is_dir() { {
// Recursively search subdirectories let path = entry.path();
if let Ok(subentries) = fs::read_dir(&path) {
for subentry in subentries.flatten() { if path.is_file() {
let subpath = subentry.path(); // Try to read the first few bytes of the file to check if it's a manifest file
if subpath.is_file() { let mut file = match fs::File::open(&path) {
// Try to read the first few bytes of the file to check if it's a manifest file Ok(file) => file,
let mut file = match fs::File::open(&subpath) { Err(err) => {
Ok(file) => file, error!(
Err(err) => { "FileBackend::build_search_index: Error opening file {}: {}",
error!( path.display(),
"FileBackend::build_search_index: Error opening file {}: {}", err
subpath.display(), );
err
);
continue;
}
};
let mut buffer = [0; 1024];
let bytes_read = match file.read(&mut buffer) {
Ok(bytes) => bytes,
Err(err) => {
error!(
"FileBackend::build_search_index: Error reading file {}: {}",
subpath.display(),
err
);
continue;
}
};
// Check if the file starts with a valid manifest marker
if bytes_read == 0
|| (buffer[0] != b'{' && buffer[0] != b'<' && buffer[0] != b's')
{
continue;
}
// Parse the manifest file to get package information
match Manifest::parse_file(&subpath) {
Ok(manifest) => {
// Look for the pkg.fmri attribute
for attr in &manifest.attributes {
if attr.key == "pkg.fmri" && !attr.values.is_empty() {
let fmri_str = &attr.values[0];
// Parse the FMRI using our Fmri type
match Fmri::parse(fmri_str) {
Ok(parsed_fmri) => {
// Create a PackageInfo struct
let package_info = PackageInfo {
fmri: parsed_fmri.clone(),
};
// Create a PackageContents struct
let version = parsed_fmri.version();
let package_id = if !version.is_empty() {
format!("{}@{}", parsed_fmri.stem(), version)
} else {
parsed_fmri.stem().to_string()
};
// Extract content information
let files = if !manifest.files.is_empty() {
Some(
manifest
.files
.iter()
.map(|f| f.path.clone())
.collect(),
)
} else {
None
};
let directories =
if !manifest.directories.is_empty() {
Some(
manifest
.directories
.iter()
.map(|d| d.path.clone())
.collect(),
)
} else {
None
};
let links = if !manifest.links.is_empty() {
Some(
manifest
.links
.iter()
.map(|l| l.path.clone())
.collect(),
)
} else {
None
};
let dependencies =
if !manifest.dependencies.is_empty() {
Some(
manifest
.dependencies
.iter()
.filter_map(|d| {
d.fmri
.as_ref()
.map(|f| f.to_string())
})
.collect(),
)
} else {
None
};
let licenses = if !manifest.licenses.is_empty() {
Some(
manifest
.licenses
.iter()
.map(|l| {
if let Some(path_prop) =
l.properties.get("path")
{
path_prop.value.clone()
} else if let Some(license_prop) =
l.properties.get("license")
{
license_prop.value.clone()
} else {
l.payload.clone()
}
})
.collect(),
)
} else {
None
};
// Create a PackageContents struct
let package_contents = PackageContents {
package_id,
files,
directories,
links,
dependencies,
licenses,
};
// Add the package to the index
index.add_package(&package_info, Some(&package_contents));
}
Err(err) => {
// Log the error but continue processing
error!(
"FileBackend::build_search_index: Error parsing FMRI '{}': {}",
fmri_str, err
);
}
}
}
}
}
Err(err) => {
// Log the error but continue processing other files
error!(
"FileBackend::build_search_index: Error parsing manifest file {}: {}",
subpath.display(),
err
);
}
}
}
}
}
} else if path.is_file() {
// Try to read the first few bytes of the file to check if it's a manifest file
let mut file = match fs::File::open(&path) {
Ok(file) => file,
Err(err) => {
error!(
"FileBackend::build_search_index: Error opening file {}: {}",
path.display(),
err
);
continue;
}
};
let mut buffer = [0; 1024];
let bytes_read = match file.read(&mut buffer) {
Ok(bytes) => bytes,
Err(err) => {
error!(
"FileBackend::build_search_index: Error reading file {}: {}",
path.display(),
err
);
continue;
}
};
// Check if the file starts with a valid manifest marker
if bytes_read == 0
|| (buffer[0] != b'{' && buffer[0] != b'<' && buffer[0] != b's')
{
continue; continue;
} }
// Parse the manifest file to get package information };
match Manifest::parse_file(&path) {
Ok(manifest) => {
// Look for the pkg.fmri attribute
for attr in &manifest.attributes {
if attr.key == "pkg.fmri" && !attr.values.is_empty() {
let fmri_str = &attr.values[0];
// Parse the FMRI using our Fmri type let mut buffer = [0; 1024];
match Fmri::parse(fmri_str) { let bytes_read = match file.read(&mut buffer) {
Ok(parsed_fmri) => { Ok(bytes) => bytes,
// Create a PackageInfo struct Err(err) => {
let package_info = PackageInfo { error!(
fmri: parsed_fmri.clone(), "FileBackend::build_search_index: Error reading file {}: {}",
}; path.display(),
err
);
continue;
}
};
// Create a PackageContents struct // Check if the file starts with a valid manifest marker
let version = parsed_fmri.version(); if bytes_read == 0
let package_id = if !version.is_empty() { || (buffer[0] != b'{' && buffer[0] != b'<' && buffer[0] != b's')
format!("{}@{}", parsed_fmri.stem(), version) {
} else { continue;
parsed_fmri.stem().to_string() }
};
// Extract content information // Parse the manifest file to get package information
let files = if !manifest.files.is_empty() { match Manifest::parse_file(&path) {
Ok(manifest) => {
// Look for the pkg.fmri attribute
for attr in &manifest.attributes {
if attr.key == "pkg.fmri" && !attr.values.is_empty() {
let fmri_str = &attr.values[0];
// Parse the FMRI using our Fmri type
match Fmri::parse(fmri_str) {
Ok(parsed_fmri) => {
// Create a PackageInfo struct
let package_info = PackageInfo {
fmri: parsed_fmri.clone(),
};
// Create a PackageContents struct
let version = parsed_fmri.version();
let package_id = if !version.is_empty() {
format!("{}@{}", parsed_fmri.stem(), version)
} else {
parsed_fmri.stem().to_string()
};
// Extract content information
let files = if !manifest.files.is_empty() {
Some(
manifest
.files
.iter()
.map(|f| f.path.clone())
.collect(),
)
} else {
None
};
let directories =
if !manifest.directories.is_empty() {
Some( Some(
manifest manifest
.files .directories
.iter() .iter()
.map(|f| f.path.clone()) .map(|d| d.path.clone())
.collect(), .collect(),
) )
} else { } else {
None None
}; };
let directories = let links = if !manifest.links.is_empty() {
if !manifest.directories.is_empty() { Some(
Some( manifest
manifest .links
.directories .iter()
.iter() .map(|l| l.path.clone())
.map(|d| d.path.clone()) .collect(),
.collect(), )
) } else {
} else { None
None };
};
let links = if !manifest.links.is_empty() { let dependencies =
if !manifest.dependencies.is_empty() {
Some( Some(
manifest manifest
.links .dependencies
.iter() .iter()
.map(|l| l.path.clone()) .filter_map(|d| {
.collect(), d.fmri
) .as_ref()
} else { .map(|f| f.to_string())
None
};
let dependencies =
if !manifest.dependencies.is_empty() {
Some(
manifest
.dependencies
.iter()
.filter_map(|d| {
d.fmri
.as_ref()
.map(|f| f.to_string())
})
.collect(),
)
} else {
None
};
let licenses = if !manifest.licenses.is_empty() {
Some(
manifest
.licenses
.iter()
.map(|l| {
if let Some(path_prop) =
l.properties.get("path")
{
path_prop.value.clone()
} else if let Some(license_prop) =
l.properties.get("license")
{
license_prop.value.clone()
} else {
l.payload.clone()
}
}) })
.collect(), .collect(),
) )
@ -2307,43 +2110,64 @@ impl FileBackend {
None None
}; };
let package_contents = PackageContents { let licenses = if !manifest.licenses.is_empty() {
package_id, Some(
files, manifest
directories, .licenses
links, .iter()
dependencies, .map(|l| {
licenses, if let Some(path_prop) =
}; l.properties.get("path")
{
path_prop.value.clone()
} else if let Some(license_prop) =
l.properties.get("license")
{
license_prop.value.clone()
} else {
l.payload.clone()
}
})
.collect(),
)
} else {
None
};
// Add the package to the index // Create a PackageContents struct
index.add_package( let package_contents = PackageContents {
&package_info, package_id,
Some(&package_contents), files,
); directories,
links,
dependencies,
licenses,
};
// Found the package info, no need to check other attributes // Add the package to the index
break; index.add_package(&package_info, Some(&package_contents));
}
Err(err) => { // Found the package info, no need to check other attributes
// Log the error but continue processing break;
error!( }
Err(err) => {
// Log the error but continue processing
error!(
"FileBackend::build_search_index: Error parsing FMRI '{}': {}", "FileBackend::build_search_index: Error parsing FMRI '{}': {}",
fmri_str, err fmri_str, err
); );
}
} }
} }
} }
} }
Err(err) => { }
// Log the error but continue processing other files Err(err) => {
error!( // Log the error but continue processing other files
error!(
"FileBackend::build_search_index: Error parsing manifest file {}: {}", "FileBackend::build_search_index: Error parsing manifest file {}: {}",
path.display(), path.display(),
err err
); );
}
} }
} }
} }