mirror of
https://codeberg.org/Toasterson/ips.git
synced 2026-04-10 21:30:41 +00:00
Add search method to repository module for querying packages, update dependencies to support search functionality.
This commit is contained in:
parent
2cb63fbef0
commit
63f2d1da62
5 changed files with 1229 additions and 47 deletions
879
Cargo.lock
generated
879
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -32,3 +32,5 @@ flate2 = "1.0.28"
|
|||
lz4 = "1.24.0"
|
||||
semver = { version = "1.0.20", features = ["serde"] }
|
||||
diff-struct = "0.5.3"
|
||||
searchy = "0.5.0"
|
||||
tantivy = { version = "0.24.2", features = ["mmap"] }
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ use flate2::write::GzEncoder;
|
|||
use flate2::Compression as GzipCompression;
|
||||
use lz4::EncoderBuilder;
|
||||
use regex::Regex;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
use crate::actions::{Manifest, File as FileAction};
|
||||
use crate::digest::Digest;
|
||||
|
|
@ -23,6 +25,174 @@ use crate::payload::{Payload, PayloadCompressionAlgorithm};
|
|||
|
||||
use super::{Repository, RepositoryConfig, RepositoryVersion, REPOSITORY_CONFIG_FILENAME, PublisherInfo, RepositoryInfo, PackageInfo, PackageContents};
|
||||
|
||||
/// Search index for a repository
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct SearchIndex {
|
||||
/// Maps search terms to package FMRIs
|
||||
terms: HashMap<String, HashSet<String>>,
|
||||
/// Maps package FMRIs to package names
|
||||
packages: HashMap<String, String>,
|
||||
/// Last updated timestamp
|
||||
updated: u64,
|
||||
}
|
||||
|
||||
impl SearchIndex {
|
||||
/// Create a new empty search index
|
||||
fn new() -> Self {
|
||||
SearchIndex {
|
||||
terms: HashMap::new(),
|
||||
packages: HashMap::new(),
|
||||
updated: SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a term to the index for a package
|
||||
fn add_term(&mut self, term: &str, fmri: &str, name: &str) {
|
||||
// Convert term to lowercase for case-insensitive search
|
||||
let term = term.to_lowercase();
|
||||
|
||||
// Add the term to the index
|
||||
self.terms.entry(term)
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(fmri.to_string());
|
||||
|
||||
// Add the package to the packages map
|
||||
self.packages.insert(fmri.to_string(), name.to_string());
|
||||
}
|
||||
|
||||
/// Add a package to the index
|
||||
fn add_package(&mut self, package: &PackageInfo, contents: Option<&PackageContents>) {
|
||||
// Get the FMRI as a string
|
||||
let fmri = package.fmri.to_string();
|
||||
|
||||
// Add the package name as a term
|
||||
self.add_term(&package.fmri.name, &fmri, &package.fmri.name);
|
||||
|
||||
// Add the publisher as a term if available
|
||||
if let Some(publisher) = &package.fmri.publisher {
|
||||
self.add_term(publisher, &fmri, &package.fmri.name);
|
||||
}
|
||||
|
||||
// Add the version as a term if available
|
||||
if let Some(version) = &package.fmri.version {
|
||||
self.add_term(&version.to_string(), &fmri, &package.fmri.name);
|
||||
}
|
||||
|
||||
// Add contents if available
|
||||
if let Some(content) = contents {
|
||||
// Add files
|
||||
if let Some(files) = &content.files {
|
||||
for file in files {
|
||||
self.add_term(file, &fmri, &package.fmri.name);
|
||||
}
|
||||
}
|
||||
|
||||
// Add directories
|
||||
if let Some(directories) = &content.directories {
|
||||
for dir in directories {
|
||||
self.add_term(dir, &fmri, &package.fmri.name);
|
||||
}
|
||||
}
|
||||
|
||||
// Add dependencies
|
||||
if let Some(dependencies) = &content.dependencies {
|
||||
for dep in dependencies {
|
||||
self.add_term(dep, &fmri, &package.fmri.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the timestamp
|
||||
self.updated = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
}
|
||||
|
||||
/// Search the index for packages matching a query
|
||||
fn search(&self, query: &str, limit: Option<usize>) -> Vec<String> {
|
||||
// Convert query to lowercase for case-insensitive search
|
||||
let query = query.to_lowercase();
|
||||
|
||||
// Split the query into terms
|
||||
let terms: Vec<&str> = query.split_whitespace().collect();
|
||||
|
||||
// If no terms, return empty result
|
||||
if terms.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Find packages that match all terms
|
||||
let mut result_set: Option<HashSet<String>> = None;
|
||||
|
||||
for term in terms {
|
||||
// Find packages that match this term
|
||||
if let Some(packages) = self.terms.get(term) {
|
||||
// If this is the first term, initialize the result set
|
||||
if result_set.is_none() {
|
||||
result_set = Some(packages.clone());
|
||||
} else {
|
||||
// Otherwise, intersect with the current result set
|
||||
result_set = result_set.map(|rs| {
|
||||
rs.intersection(packages)
|
||||
.cloned()
|
||||
.collect()
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// If any term has no matches, the result is empty
|
||||
return Vec::new();
|
||||
}
|
||||
}
|
||||
|
||||
// Convert the result set to a vector
|
||||
let mut results: Vec<String> = result_set
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
// Sort the results
|
||||
results.sort();
|
||||
|
||||
// Apply limit if specified
|
||||
if let Some(max_results) = limit {
|
||||
results.truncate(max_results);
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
/// Save the index to a file
|
||||
fn save(&self, path: &Path) -> Result<()> {
|
||||
// Create the parent directory if it doesn't exist
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
// Serialize the index to JSON
|
||||
let json = serde_json::to_string(self)?;
|
||||
|
||||
// Write the JSON to the file
|
||||
fs::write(path, json)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load the index from a file
|
||||
fn load(path: &Path) -> Result<Self> {
|
||||
// Read the file
|
||||
let json = fs::read_to_string(path)?;
|
||||
|
||||
// Deserialize the JSON
|
||||
let index: SearchIndex = serde_json::from_str(&json)?;
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
}
|
||||
|
||||
/// Repository implementation that uses the local filesystem
|
||||
pub struct FileBackend {
|
||||
pub path: PathBuf,
|
||||
|
|
@ -803,9 +973,6 @@ impl Repository for FileBackend {
|
|||
|
||||
/// Rebuild repository metadata
|
||||
fn rebuild(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
|
||||
// This is a placeholder implementation
|
||||
// In a real implementation, we would rebuild catalogs and search indexes
|
||||
|
||||
// Filter publishers if specified
|
||||
let publishers = if let Some(pub_name) = publisher {
|
||||
if !self.config.publishers.contains(&pub_name.to_string()) {
|
||||
|
|
@ -827,7 +994,7 @@ impl Repository for FileBackend {
|
|||
|
||||
if !no_index {
|
||||
println!("Rebuilding search index...");
|
||||
// In a real implementation, we would rebuild the search index
|
||||
self.build_search_index(&pub_name)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -836,9 +1003,6 @@ impl Repository for FileBackend {
|
|||
|
||||
/// Refresh repository metadata
|
||||
fn refresh(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
|
||||
// This is a placeholder implementation
|
||||
// In a real implementation, we would refresh catalogs and search indexes
|
||||
|
||||
// Filter publishers if specified
|
||||
let publishers = if let Some(pub_name) = publisher {
|
||||
if !self.config.publishers.contains(&pub_name.to_string()) {
|
||||
|
|
@ -860,7 +1024,17 @@ impl Repository for FileBackend {
|
|||
|
||||
if !no_index {
|
||||
println!("Refreshing search index...");
|
||||
// In a real implementation, we would refresh the search index
|
||||
|
||||
// Check if the index exists
|
||||
let index_path = self.path.join("index").join(&pub_name).join("search.json");
|
||||
if !index_path.exists() {
|
||||
// If the index doesn't exist, build it
|
||||
self.build_search_index(&pub_name)?;
|
||||
} else {
|
||||
// If the index exists, update it
|
||||
// For simplicity, we'll just rebuild it
|
||||
self.build_search_index(&pub_name)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -882,6 +1056,59 @@ impl Repository for FileBackend {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Search for packages in the repository
|
||||
fn search(&self, query: &str, publisher: Option<&str>, limit: Option<usize>) -> Result<Vec<PackageInfo>> {
|
||||
// If no publisher is specified, use the default publisher if available
|
||||
let publisher = publisher.or_else(|| self.config.default_publisher.as_deref());
|
||||
|
||||
// If still no publisher, we need to search all publishers
|
||||
let publishers = if let Some(pub_name) = publisher {
|
||||
vec![pub_name.to_string()]
|
||||
} else {
|
||||
self.config.publishers.clone()
|
||||
};
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
// For each publisher, search the index
|
||||
for pub_name in publishers {
|
||||
// Check if the index exists
|
||||
if let Ok(Some(index)) = self.get_search_index(&pub_name) {
|
||||
// Search the index
|
||||
let fmris = index.search(query, limit);
|
||||
|
||||
// Convert FMRIs to PackageInfo
|
||||
for fmri_str in fmris {
|
||||
if let Ok(fmri) = Fmri::parse(&fmri_str) {
|
||||
results.push(PackageInfo { fmri });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If the index doesn't exist, fall back to the simple search
|
||||
let all_packages = self.list_packages(Some(&pub_name), None)?;
|
||||
|
||||
// Filter packages by the query string
|
||||
let matching_packages: Vec<PackageInfo> = all_packages
|
||||
.into_iter()
|
||||
.filter(|pkg| {
|
||||
// Match against package name
|
||||
pkg.fmri.name.contains(query)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Add matching packages to the results
|
||||
results.extend(matching_packages);
|
||||
}
|
||||
}
|
||||
|
||||
// Apply limit if specified
|
||||
if let Some(max_results) = limit {
|
||||
results.truncate(max_results);
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
impl FileBackend {
|
||||
|
|
@ -897,6 +1124,142 @@ impl FileBackend {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Build a search index for a publisher
|
||||
fn build_search_index(&self, publisher: &str) -> Result<()> {
|
||||
println!("Building search index for publisher: {}", publisher);
|
||||
|
||||
// Create a new search index
|
||||
let mut index = SearchIndex::new();
|
||||
|
||||
// Get the publisher's package directory
|
||||
let publisher_pkg_dir = self.path.join("pkg").join(publisher);
|
||||
|
||||
// Check if the publisher directory exists
|
||||
if publisher_pkg_dir.exists() {
|
||||
// Walk through the directory and process package manifests
|
||||
if let Ok(entries) = fs::read_dir(&publisher_pkg_dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
|
||||
// Skip directories, only process files (package manifests)
|
||||
if path.is_file() {
|
||||
// Parse the manifest file to get package information
|
||||
match Manifest::parse_file(&path) {
|
||||
Ok(manifest) => {
|
||||
// Look for the pkg.fmri attribute
|
||||
for attr in &manifest.attributes {
|
||||
if attr.key == "pkg.fmri" && !attr.values.is_empty() {
|
||||
let fmri_str = &attr.values[0];
|
||||
|
||||
// Parse the FMRI using our Fmri type
|
||||
match Fmri::parse(fmri_str) {
|
||||
Ok(parsed_fmri) => {
|
||||
// Create a PackageInfo struct
|
||||
let package_info = PackageInfo {
|
||||
fmri: parsed_fmri.clone(),
|
||||
};
|
||||
|
||||
// Create a PackageContents struct
|
||||
let package_id = if let Some(version) = &parsed_fmri.version {
|
||||
format!("{}@{}", parsed_fmri.name, version)
|
||||
} else {
|
||||
parsed_fmri.name.clone()
|
||||
};
|
||||
|
||||
// Extract content information
|
||||
let files = if !manifest.files.is_empty() {
|
||||
Some(manifest.files.iter().map(|f| f.path.clone()).collect())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let directories = if !manifest.directories.is_empty() {
|
||||
Some(manifest.directories.iter().map(|d| d.path.clone()).collect())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let links = if !manifest.links.is_empty() {
|
||||
Some(manifest.links.iter().map(|l| l.path.clone()).collect())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let dependencies = if !manifest.dependencies.is_empty() {
|
||||
Some(manifest.dependencies.iter()
|
||||
.filter_map(|d| d.fmri.as_ref().map(|f| f.to_string()))
|
||||
.collect())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let licenses = if !manifest.licenses.is_empty() {
|
||||
Some(manifest.licenses.iter().map(|l| {
|
||||
if let Some(path_prop) = l.properties.get("path") {
|
||||
path_prop.value.clone()
|
||||
} else if let Some(license_prop) = l.properties.get("license") {
|
||||
license_prop.value.clone()
|
||||
} else {
|
||||
l.payload.clone()
|
||||
}
|
||||
}).collect())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let package_contents = PackageContents {
|
||||
package_id,
|
||||
files,
|
||||
directories,
|
||||
links,
|
||||
dependencies,
|
||||
licenses,
|
||||
};
|
||||
|
||||
// Add the package to the index
|
||||
index.add_package(&package_info, Some(&package_contents));
|
||||
|
||||
// Found the package info, no need to check other attributes
|
||||
break;
|
||||
},
|
||||
Err(err) => {
|
||||
// Log the error but continue processing
|
||||
eprintln!("Error parsing FMRI '{}': {}", fmri_str, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
// Log the error but continue processing other files
|
||||
eprintln!("Error parsing manifest file {}: {}", path.display(), err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save the index to a file
|
||||
let index_path = self.path.join("index").join(publisher).join("search.json");
|
||||
index.save(&index_path)?;
|
||||
|
||||
println!("Search index built for publisher: {}", publisher);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the search index for a publisher
|
||||
fn get_search_index(&self, publisher: &str) -> Result<Option<SearchIndex>> {
|
||||
let index_path = self.path.join("index").join(publisher).join("search.json");
|
||||
|
||||
if index_path.exists() {
|
||||
Ok(Some(SearchIndex::load(&index_path)?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn test_publish_files(&mut self, test_dir: &Path) -> Result<()> {
|
||||
println!("Testing file publishing...");
|
||||
|
|
|
|||
|
|
@ -143,4 +143,16 @@ pub trait Repository {
|
|||
|
||||
/// Set the default publisher for the repository
|
||||
fn set_default_publisher(&mut self, publisher: &str) -> Result<()>;
|
||||
|
||||
/// Search for packages in the repository
|
||||
///
|
||||
/// This method searches for packages in the repository using the search index.
|
||||
/// It returns a list of packages that match the search query.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `query` - The search query
|
||||
/// * `publisher` - Optional publisher to limit the search to
|
||||
/// * `limit` - Optional maximum number of results to return
|
||||
fn search(&self, query: &str, publisher: Option<&str>, limit: Option<usize>) -> Result<Vec<PackageInfo>>;
|
||||
}
|
||||
|
|
@ -366,6 +366,10 @@ impl Repository for RestBackend {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn search(&self, query: &str, publisher: Option<&str>, limit: Option<usize>) -> Result<Vec<PackageInfo>> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
impl RestBackend {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue