mirror of
https://codeberg.org/Toasterson/ips.git
synced 2026-04-10 21:30:41 +00:00
Add search method to repository module for querying packages, update dependencies to support search functionality.
This commit is contained in:
parent
2cb63fbef0
commit
63f2d1da62
5 changed files with 1229 additions and 47 deletions
879
Cargo.lock
generated
879
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -32,3 +32,5 @@ flate2 = "1.0.28"
|
||||||
lz4 = "1.24.0"
|
lz4 = "1.24.0"
|
||||||
semver = { version = "1.0.20", features = ["serde"] }
|
semver = { version = "1.0.20", features = ["serde"] }
|
||||||
diff-struct = "0.5.3"
|
diff-struct = "0.5.3"
|
||||||
|
searchy = "0.5.0"
|
||||||
|
tantivy = { version = "0.24.2", features = ["mmap"] }
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,8 @@ use flate2::write::GzEncoder;
|
||||||
use flate2::Compression as GzipCompression;
|
use flate2::Compression as GzipCompression;
|
||||||
use lz4::EncoderBuilder;
|
use lz4::EncoderBuilder;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use serde::{Serialize, Deserialize};
|
||||||
|
|
||||||
use crate::actions::{Manifest, File as FileAction};
|
use crate::actions::{Manifest, File as FileAction};
|
||||||
use crate::digest::Digest;
|
use crate::digest::Digest;
|
||||||
|
|
@ -23,6 +25,174 @@ use crate::payload::{Payload, PayloadCompressionAlgorithm};
|
||||||
|
|
||||||
use super::{Repository, RepositoryConfig, RepositoryVersion, REPOSITORY_CONFIG_FILENAME, PublisherInfo, RepositoryInfo, PackageInfo, PackageContents};
|
use super::{Repository, RepositoryConfig, RepositoryVersion, REPOSITORY_CONFIG_FILENAME, PublisherInfo, RepositoryInfo, PackageInfo, PackageContents};
|
||||||
|
|
||||||
|
/// Search index for a repository
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
struct SearchIndex {
|
||||||
|
/// Maps search terms to package FMRIs
|
||||||
|
terms: HashMap<String, HashSet<String>>,
|
||||||
|
/// Maps package FMRIs to package names
|
||||||
|
packages: HashMap<String, String>,
|
||||||
|
/// Last updated timestamp
|
||||||
|
updated: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchIndex {
|
||||||
|
/// Create a new empty search index
|
||||||
|
fn new() -> Self {
|
||||||
|
SearchIndex {
|
||||||
|
terms: HashMap::new(),
|
||||||
|
packages: HashMap::new(),
|
||||||
|
updated: SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_secs(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a term to the index for a package
|
||||||
|
fn add_term(&mut self, term: &str, fmri: &str, name: &str) {
|
||||||
|
// Convert term to lowercase for case-insensitive search
|
||||||
|
let term = term.to_lowercase();
|
||||||
|
|
||||||
|
// Add the term to the index
|
||||||
|
self.terms.entry(term)
|
||||||
|
.or_insert_with(HashSet::new)
|
||||||
|
.insert(fmri.to_string());
|
||||||
|
|
||||||
|
// Add the package to the packages map
|
||||||
|
self.packages.insert(fmri.to_string(), name.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a package to the index
|
||||||
|
fn add_package(&mut self, package: &PackageInfo, contents: Option<&PackageContents>) {
|
||||||
|
// Get the FMRI as a string
|
||||||
|
let fmri = package.fmri.to_string();
|
||||||
|
|
||||||
|
// Add the package name as a term
|
||||||
|
self.add_term(&package.fmri.name, &fmri, &package.fmri.name);
|
||||||
|
|
||||||
|
// Add the publisher as a term if available
|
||||||
|
if let Some(publisher) = &package.fmri.publisher {
|
||||||
|
self.add_term(publisher, &fmri, &package.fmri.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the version as a term if available
|
||||||
|
if let Some(version) = &package.fmri.version {
|
||||||
|
self.add_term(&version.to_string(), &fmri, &package.fmri.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add contents if available
|
||||||
|
if let Some(content) = contents {
|
||||||
|
// Add files
|
||||||
|
if let Some(files) = &content.files {
|
||||||
|
for file in files {
|
||||||
|
self.add_term(file, &fmri, &package.fmri.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add directories
|
||||||
|
if let Some(directories) = &content.directories {
|
||||||
|
for dir in directories {
|
||||||
|
self.add_term(dir, &fmri, &package.fmri.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add dependencies
|
||||||
|
if let Some(dependencies) = &content.dependencies {
|
||||||
|
for dep in dependencies {
|
||||||
|
self.add_term(dep, &fmri, &package.fmri.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the timestamp
|
||||||
|
self.updated = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_secs();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search the index for packages matching a query
|
||||||
|
fn search(&self, query: &str, limit: Option<usize>) -> Vec<String> {
|
||||||
|
// Convert query to lowercase for case-insensitive search
|
||||||
|
let query = query.to_lowercase();
|
||||||
|
|
||||||
|
// Split the query into terms
|
||||||
|
let terms: Vec<&str> = query.split_whitespace().collect();
|
||||||
|
|
||||||
|
// If no terms, return empty result
|
||||||
|
if terms.is_empty() {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find packages that match all terms
|
||||||
|
let mut result_set: Option<HashSet<String>> = None;
|
||||||
|
|
||||||
|
for term in terms {
|
||||||
|
// Find packages that match this term
|
||||||
|
if let Some(packages) = self.terms.get(term) {
|
||||||
|
// If this is the first term, initialize the result set
|
||||||
|
if result_set.is_none() {
|
||||||
|
result_set = Some(packages.clone());
|
||||||
|
} else {
|
||||||
|
// Otherwise, intersect with the current result set
|
||||||
|
result_set = result_set.map(|rs| {
|
||||||
|
rs.intersection(packages)
|
||||||
|
.cloned()
|
||||||
|
.collect()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If any term has no matches, the result is empty
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the result set to a vector
|
||||||
|
let mut results: Vec<String> = result_set
|
||||||
|
.unwrap_or_default()
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Sort the results
|
||||||
|
results.sort();
|
||||||
|
|
||||||
|
// Apply limit if specified
|
||||||
|
if let Some(max_results) = limit {
|
||||||
|
results.truncate(max_results);
|
||||||
|
}
|
||||||
|
|
||||||
|
results
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Save the index to a file
|
||||||
|
fn save(&self, path: &Path) -> Result<()> {
|
||||||
|
// Create the parent directory if it doesn't exist
|
||||||
|
if let Some(parent) = path.parent() {
|
||||||
|
fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize the index to JSON
|
||||||
|
let json = serde_json::to_string(self)?;
|
||||||
|
|
||||||
|
// Write the JSON to the file
|
||||||
|
fs::write(path, json)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load the index from a file
|
||||||
|
fn load(path: &Path) -> Result<Self> {
|
||||||
|
// Read the file
|
||||||
|
let json = fs::read_to_string(path)?;
|
||||||
|
|
||||||
|
// Deserialize the JSON
|
||||||
|
let index: SearchIndex = serde_json::from_str(&json)?;
|
||||||
|
|
||||||
|
Ok(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Repository implementation that uses the local filesystem
|
/// Repository implementation that uses the local filesystem
|
||||||
pub struct FileBackend {
|
pub struct FileBackend {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
|
|
@ -803,9 +973,6 @@ impl Repository for FileBackend {
|
||||||
|
|
||||||
/// Rebuild repository metadata
|
/// Rebuild repository metadata
|
||||||
fn rebuild(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
|
fn rebuild(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
|
||||||
// This is a placeholder implementation
|
|
||||||
// In a real implementation, we would rebuild catalogs and search indexes
|
|
||||||
|
|
||||||
// Filter publishers if specified
|
// Filter publishers if specified
|
||||||
let publishers = if let Some(pub_name) = publisher {
|
let publishers = if let Some(pub_name) = publisher {
|
||||||
if !self.config.publishers.contains(&pub_name.to_string()) {
|
if !self.config.publishers.contains(&pub_name.to_string()) {
|
||||||
|
|
@ -827,7 +994,7 @@ impl Repository for FileBackend {
|
||||||
|
|
||||||
if !no_index {
|
if !no_index {
|
||||||
println!("Rebuilding search index...");
|
println!("Rebuilding search index...");
|
||||||
// In a real implementation, we would rebuild the search index
|
self.build_search_index(&pub_name)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -836,9 +1003,6 @@ impl Repository for FileBackend {
|
||||||
|
|
||||||
/// Refresh repository metadata
|
/// Refresh repository metadata
|
||||||
fn refresh(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
|
fn refresh(&self, publisher: Option<&str>, no_catalog: bool, no_index: bool) -> Result<()> {
|
||||||
// This is a placeholder implementation
|
|
||||||
// In a real implementation, we would refresh catalogs and search indexes
|
|
||||||
|
|
||||||
// Filter publishers if specified
|
// Filter publishers if specified
|
||||||
let publishers = if let Some(pub_name) = publisher {
|
let publishers = if let Some(pub_name) = publisher {
|
||||||
if !self.config.publishers.contains(&pub_name.to_string()) {
|
if !self.config.publishers.contains(&pub_name.to_string()) {
|
||||||
|
|
@ -860,7 +1024,17 @@ impl Repository for FileBackend {
|
||||||
|
|
||||||
if !no_index {
|
if !no_index {
|
||||||
println!("Refreshing search index...");
|
println!("Refreshing search index...");
|
||||||
// In a real implementation, we would refresh the search index
|
|
||||||
|
// Check if the index exists
|
||||||
|
let index_path = self.path.join("index").join(&pub_name).join("search.json");
|
||||||
|
if !index_path.exists() {
|
||||||
|
// If the index doesn't exist, build it
|
||||||
|
self.build_search_index(&pub_name)?;
|
||||||
|
} else {
|
||||||
|
// If the index exists, update it
|
||||||
|
// For simplicity, we'll just rebuild it
|
||||||
|
self.build_search_index(&pub_name)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -882,6 +1056,59 @@ impl Repository for FileBackend {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Search for packages in the repository
|
||||||
|
fn search(&self, query: &str, publisher: Option<&str>, limit: Option<usize>) -> Result<Vec<PackageInfo>> {
|
||||||
|
// If no publisher is specified, use the default publisher if available
|
||||||
|
let publisher = publisher.or_else(|| self.config.default_publisher.as_deref());
|
||||||
|
|
||||||
|
// If still no publisher, we need to search all publishers
|
||||||
|
let publishers = if let Some(pub_name) = publisher {
|
||||||
|
vec![pub_name.to_string()]
|
||||||
|
} else {
|
||||||
|
self.config.publishers.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut results = Vec::new();
|
||||||
|
|
||||||
|
// For each publisher, search the index
|
||||||
|
for pub_name in publishers {
|
||||||
|
// Check if the index exists
|
||||||
|
if let Ok(Some(index)) = self.get_search_index(&pub_name) {
|
||||||
|
// Search the index
|
||||||
|
let fmris = index.search(query, limit);
|
||||||
|
|
||||||
|
// Convert FMRIs to PackageInfo
|
||||||
|
for fmri_str in fmris {
|
||||||
|
if let Ok(fmri) = Fmri::parse(&fmri_str) {
|
||||||
|
results.push(PackageInfo { fmri });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If the index doesn't exist, fall back to the simple search
|
||||||
|
let all_packages = self.list_packages(Some(&pub_name), None)?;
|
||||||
|
|
||||||
|
// Filter packages by the query string
|
||||||
|
let matching_packages: Vec<PackageInfo> = all_packages
|
||||||
|
.into_iter()
|
||||||
|
.filter(|pkg| {
|
||||||
|
// Match against package name
|
||||||
|
pkg.fmri.name.contains(query)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Add matching packages to the results
|
||||||
|
results.extend(matching_packages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply limit if specified
|
||||||
|
if let Some(max_results) = limit {
|
||||||
|
results.truncate(max_results);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(results)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileBackend {
|
impl FileBackend {
|
||||||
|
|
@ -897,6 +1124,142 @@ impl FileBackend {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build a search index for a publisher
|
||||||
|
fn build_search_index(&self, publisher: &str) -> Result<()> {
|
||||||
|
println!("Building search index for publisher: {}", publisher);
|
||||||
|
|
||||||
|
// Create a new search index
|
||||||
|
let mut index = SearchIndex::new();
|
||||||
|
|
||||||
|
// Get the publisher's package directory
|
||||||
|
let publisher_pkg_dir = self.path.join("pkg").join(publisher);
|
||||||
|
|
||||||
|
// Check if the publisher directory exists
|
||||||
|
if publisher_pkg_dir.exists() {
|
||||||
|
// Walk through the directory and process package manifests
|
||||||
|
if let Ok(entries) = fs::read_dir(&publisher_pkg_dir) {
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
let path = entry.path();
|
||||||
|
|
||||||
|
// Skip directories, only process files (package manifests)
|
||||||
|
if path.is_file() {
|
||||||
|
// Parse the manifest file to get package information
|
||||||
|
match Manifest::parse_file(&path) {
|
||||||
|
Ok(manifest) => {
|
||||||
|
// Look for the pkg.fmri attribute
|
||||||
|
for attr in &manifest.attributes {
|
||||||
|
if attr.key == "pkg.fmri" && !attr.values.is_empty() {
|
||||||
|
let fmri_str = &attr.values[0];
|
||||||
|
|
||||||
|
// Parse the FMRI using our Fmri type
|
||||||
|
match Fmri::parse(fmri_str) {
|
||||||
|
Ok(parsed_fmri) => {
|
||||||
|
// Create a PackageInfo struct
|
||||||
|
let package_info = PackageInfo {
|
||||||
|
fmri: parsed_fmri.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create a PackageContents struct
|
||||||
|
let package_id = if let Some(version) = &parsed_fmri.version {
|
||||||
|
format!("{}@{}", parsed_fmri.name, version)
|
||||||
|
} else {
|
||||||
|
parsed_fmri.name.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract content information
|
||||||
|
let files = if !manifest.files.is_empty() {
|
||||||
|
Some(manifest.files.iter().map(|f| f.path.clone()).collect())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let directories = if !manifest.directories.is_empty() {
|
||||||
|
Some(manifest.directories.iter().map(|d| d.path.clone()).collect())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let links = if !manifest.links.is_empty() {
|
||||||
|
Some(manifest.links.iter().map(|l| l.path.clone()).collect())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let dependencies = if !manifest.dependencies.is_empty() {
|
||||||
|
Some(manifest.dependencies.iter()
|
||||||
|
.filter_map(|d| d.fmri.as_ref().map(|f| f.to_string()))
|
||||||
|
.collect())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let licenses = if !manifest.licenses.is_empty() {
|
||||||
|
Some(manifest.licenses.iter().map(|l| {
|
||||||
|
if let Some(path_prop) = l.properties.get("path") {
|
||||||
|
path_prop.value.clone()
|
||||||
|
} else if let Some(license_prop) = l.properties.get("license") {
|
||||||
|
license_prop.value.clone()
|
||||||
|
} else {
|
||||||
|
l.payload.clone()
|
||||||
|
}
|
||||||
|
}).collect())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let package_contents = PackageContents {
|
||||||
|
package_id,
|
||||||
|
files,
|
||||||
|
directories,
|
||||||
|
links,
|
||||||
|
dependencies,
|
||||||
|
licenses,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add the package to the index
|
||||||
|
index.add_package(&package_info, Some(&package_contents));
|
||||||
|
|
||||||
|
// Found the package info, no need to check other attributes
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
Err(err) => {
|
||||||
|
// Log the error but continue processing
|
||||||
|
eprintln!("Error parsing FMRI '{}': {}", fmri_str, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(err) => {
|
||||||
|
// Log the error but continue processing other files
|
||||||
|
eprintln!("Error parsing manifest file {}: {}", path.display(), err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the index to a file
|
||||||
|
let index_path = self.path.join("index").join(publisher).join("search.json");
|
||||||
|
index.save(&index_path)?;
|
||||||
|
|
||||||
|
println!("Search index built for publisher: {}", publisher);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the search index for a publisher
|
||||||
|
fn get_search_index(&self, publisher: &str) -> Result<Option<SearchIndex>> {
|
||||||
|
let index_path = self.path.join("index").join(publisher).join("search.json");
|
||||||
|
|
||||||
|
if index_path.exists() {
|
||||||
|
Ok(Some(SearchIndex::load(&index_path)?))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn test_publish_files(&mut self, test_dir: &Path) -> Result<()> {
|
pub fn test_publish_files(&mut self, test_dir: &Path) -> Result<()> {
|
||||||
println!("Testing file publishing...");
|
println!("Testing file publishing...");
|
||||||
|
|
|
||||||
|
|
@ -143,4 +143,16 @@ pub trait Repository {
|
||||||
|
|
||||||
/// Set the default publisher for the repository
|
/// Set the default publisher for the repository
|
||||||
fn set_default_publisher(&mut self, publisher: &str) -> Result<()>;
|
fn set_default_publisher(&mut self, publisher: &str) -> Result<()>;
|
||||||
|
|
||||||
|
/// Search for packages in the repository
|
||||||
|
///
|
||||||
|
/// This method searches for packages in the repository using the search index.
|
||||||
|
/// It returns a list of packages that match the search query.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `query` - The search query
|
||||||
|
/// * `publisher` - Optional publisher to limit the search to
|
||||||
|
/// * `limit` - Optional maximum number of results to return
|
||||||
|
fn search(&self, query: &str, publisher: Option<&str>, limit: Option<usize>) -> Result<Vec<PackageInfo>>;
|
||||||
}
|
}
|
||||||
|
|
@ -366,6 +366,10 @@ impl Repository for RestBackend {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn search(&self, query: &str, publisher: Option<&str>, limit: Option<usize>) -> Result<Vec<PackageInfo>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RestBackend {
|
impl RestBackend {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue