diff --git a/libips/src/repository/file_backend.rs b/libips/src/repository/file_backend.rs index e16a721..16eb24e 100644 --- a/libips/src/repository/file_backend.rs +++ b/libips/src/repository/file_backend.rs @@ -19,12 +19,10 @@ use std::str::FromStr; use std::sync::Mutex; use std::time::{SystemTime, UNIX_EPOCH}; use tracing::{debug, error, info}; -use walkdir::WalkDir; - use crate::actions::{File as FileAction, Manifest}; use crate::digest::Digest; use crate::fmri::Fmri; -use crate::payload::{Payload, PayloadArchitecture, PayloadBits, PayloadCompressionAlgorithm}; +use crate::payload::{Payload, PayloadCompressionAlgorithm}; use super::catalog_writer; use super::{ @@ -66,60 +64,6 @@ pub struct IndexEntry { pub attributes: BTreeMap, } -struct SearchQuery { - pkg: Option, - action: Option, - index: Option, - token: String, -} - -fn parse_query(query: &str) -> SearchQuery { - if !query.contains(':') { - return SearchQuery { - pkg: None, - action: None, - index: None, - token: query.to_string(), - }; - } - - let parts: Vec<&str> = query.split(':').collect(); - let get_opt = |s: &str| { - if s.is_empty() { - None - } else { - Some(s.to_string()) - } - }; - - match parts.len() { - 2 => SearchQuery { - pkg: None, - action: None, - index: get_opt(parts[0]), - token: parts[1].to_string(), - }, - 3 => SearchQuery { - pkg: None, - action: get_opt(parts[0]), - index: get_opt(parts[1]), - token: parts[2].to_string(), - }, - 4 => SearchQuery { - pkg: get_opt(parts[0]), - action: get_opt(parts[1]), - index: get_opt(parts[2]), - token: parts[3].to_string(), - }, - _ => SearchQuery { - pkg: None, - action: None, - index: None, - token: query.to_string(), - }, - } -} - pub fn glob_to_regex(pattern: &str) -> String { let mut regex = String::from("^"); for c in pattern.chars() { @@ -137,223 +81,6 @@ pub fn glob_to_regex(pattern: &str) -> String { regex } -/// Search index for a repository -#[derive(Serialize, Deserialize, Debug, Clone)] -struct SearchIndex { - /// Maps search terms to list of index entries - terms: HashMap>, - /// Maps package FMRIs to package names - packages: HashMap, - /// Last updated timestamp - updated: u64, -} - -impl SearchIndex { - /// Create a new empty search index - #[allow(dead_code)] - fn new() -> Self { - SearchIndex { - terms: HashMap::new(), - packages: HashMap::new(), - updated: SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(), - } - } - - /// Add a term to the index for a package - #[allow(dead_code)] - fn add_term( - &mut self, - term: &str, - fmri: &str, - action_type: &str, - index_type: &str, - value: &str, - attributes: Option>, - ) { - let token = term.to_string(); - // Convert term to lowercase for case-insensitive search - let term_lower = term.to_lowercase(); - - let entry = IndexEntry { - fmri: fmri.to_string(), - action_type: action_type.to_string(), - index_type: index_type.to_string(), - value: value.to_string(), - token, - attributes: attributes.unwrap_or_default(), - }; - - // Add the term to the index - self.terms - .entry(term_lower) - .or_insert_with(Vec::new) - .push(entry); - } - - /// Search the index for packages matching a query - fn search(&self, query: &str, case_sensitive: bool, limit: Option) -> Vec { - // Split the query into terms (whitespace) - let terms: Vec<&str> = query.split_whitespace().collect(); - - // If no terms, return an empty result - if terms.is_empty() { - return Vec::new(); - } - - // Find packages that match all terms - let mut fmri_sets: Vec> = Vec::new(); - let mut all_entries: Vec = Vec::new(); - - for term_str in terms { - let parsed = parse_query(term_str); - let token_has_wildcard = parsed.token.contains('*') || parsed.token.contains('?'); - let token_lower = parsed.token.to_lowercase(); - - let mut term_entries: Vec<&IndexEntry> = Vec::new(); - - if token_has_wildcard { - let regex_str = glob_to_regex(&token_lower); - if let Ok(re) = Regex::new(®ex_str) { - for (key, entries) in &self.terms { - if re.is_match(key) { - term_entries.extend(entries); - } - } - } - } else { - if let Some(entries) = self.terms.get(&token_lower) { - term_entries.extend(entries); - } - } - - // Filter entries based on structured query and case sensitivity - let filtered: Vec<&IndexEntry> = term_entries - .into_iter() - .filter(|e| { - // Check Index Type - if let Some(idx) = &parsed.index { - if &e.index_type != idx { - return false; - } - } - // Check Action Type - if let Some(act) = &parsed.action { - if &e.action_type != act { - return false; - } - } - // Check Package Name (FMRI) - if let Some(pkg) = &parsed.pkg { - let pkg_has_wildcard = pkg.contains('*') || pkg.contains('?'); - if pkg_has_wildcard { - let re_str = glob_to_regex(&pkg.to_lowercase()); - if let Ok(re) = Regex::new(&re_str) { - // FMRIs are usually lowercase, but let's compare lowercase to be safe/consistent - if !re.is_match(&e.fmri.to_lowercase()) { - return false; - } - } - } else { - if !e.fmri.contains(pkg) { - return false; - } - } - } - - // Check Case Sensitivity on VALUE - if case_sensitive { - if token_has_wildcard { - let re_str = glob_to_regex(&parsed.token); // Original token - if let Ok(re) = Regex::new(&re_str) { - if !re.is_match(&e.token) { - return false; - } - } - } else { - if e.token != parsed.token { - return false; - } - } - } - - true - }) - .collect(); - - if filtered.is_empty() { - return Vec::new(); // Term found no matches - } - - let fmris: HashSet = filtered.iter().map(|e| e.fmri.clone()).collect(); - fmri_sets.push(fmris); - all_entries.extend(filtered.into_iter().cloned()); - } - - // Intersect FMRIs - let mut common_fmris = fmri_sets[0].clone(); - for set in &fmri_sets[1..] { - common_fmris.retain(|fmri| set.contains(fmri)); - if common_fmris.is_empty() { - return Vec::new(); - } - } - - // Filter entries - let mut results: Vec = Vec::new(); - for entry in all_entries { - if common_fmris.contains(&entry.fmri) { - results.push(entry); - } - } - - results.sort_by(|a, b| { - a.fmri - .cmp(&b.fmri) - .then(a.action_type.cmp(&b.action_type)) - .then(a.index_type.cmp(&b.index_type)) - .then(a.value.cmp(&b.value)) - }); - results.dedup(); - - if let Some(max_results) = limit { - results.truncate(max_results); - } - - results - } - - /// Save the index to a file - #[allow(dead_code)] - fn save(&self, path: &Path) -> Result<()> { - // Create the parent directory if it doesn't exist - if let Some(parent) = path.parent() { - fs::create_dir_all(parent)?; - } - - // Serialize the index to JSON - let json = serde_json::to_string(self)?; - - // Write the JSON to the file - fs::write(path, json)?; - - Ok(()) - } - - /// Load the index from a file - fn load(path: &Path) -> Result { - // Read the file - let json = fs::read_to_string(path)?; - - // Deserialize the JSON - let index: SearchIndex = serde_json::from_str(&json)?; - - Ok(index) - } -} - /// Repository implementation that uses the local filesystem pub struct FileBackend { pub path: PathBuf, @@ -1714,87 +1441,22 @@ impl ReadableRepository for FileBackend { limit: Option, ) -> Result> { debug!("Searching for packages with query: {}", query); - debug!("Publisher: {:?}", publisher); - debug!("Limit: {:?}", limit); - // If no publisher is specified, use the default publisher if available - let publisher = publisher.or_else(|| self.config.default_publisher.as_deref()); - debug!("Effective publisher: {:?}", publisher); - - // If still no publisher, we need to search all publishers - let publishers = if let Some(pub_name) = publisher { - vec![pub_name.to_string()] - } else { - self.config.publishers.clone() - }; - debug!("Publishers to search: {:?}", publishers); + let entries = self.search_detailed(query, publisher, limit, false)?; + // Deduplicate by FMRI and convert to PackageInfo + let mut added_fmris = HashSet::new(); let mut results = Vec::new(); - - // For each publisher, search the index - for pub_name in publishers { - debug!("Searching publisher: {}", pub_name); - - // Check if the index exists - let index_path = self.path.join("index").join(&pub_name).join("search.json"); - debug!("Index path: {}", index_path.display()); - debug!("Index exists: {}", index_path.exists()); - - if let Ok(Some(index)) = self.get_search_index(&pub_name) { - debug!("Got search index for publisher: {}", pub_name); - debug!("Index terms: {:?}", index.terms.keys().collect::>()); - - // Search the index - let entries = index.search(query, false, limit); - debug!("Search results (entries): {:?}", entries); - - // Convert entries to PackageInfo - // Use a HashSet to track added FMRIs to avoid duplicates - let mut added_fmris = HashSet::new(); - for entry in entries { - if added_fmris.contains(&entry.fmri) { - continue; - } - - if let Ok(fmri) = Fmri::parse(&entry.fmri) { - debug!("Adding package to results: {}", fmri); - results.push(PackageInfo { fmri }); - added_fmris.insert(entry.fmri); - } else { - debug!("Failed to parse FMRI: {}", entry.fmri); - } - } - } else { - debug!("No search index found for publisher: {}", pub_name); - debug!("Falling back to simple search"); - - // If the index doesn't exist, fall back to the simple search - let all_packages = self.list_packages(Some(&pub_name), None)?; - debug!("All packages: {:?}", all_packages); - - // Filter packages by the query string - let matching_packages: Vec = all_packages - .into_iter() - .filter(|pkg| { - // Match against package name - let matches = pkg.fmri.stem().contains(query); - debug!("Package: {}, Matches: {}", pkg.fmri.stem(), matches); - matches - }) - .collect(); - debug!("Matching packages: {:?}", matching_packages); - - // Add matching packages to the results - results.extend(matching_packages); + for entry in entries { + if added_fmris.contains(&entry.fmri) { + continue; + } + if let Ok(fmri) = Fmri::parse(&entry.fmri) { + results.push(PackageInfo { fmri }); + added_fmris.insert(entry.fmri); } } - // Apply limit if specified - if let Some(max_results) = limit { - results.truncate(max_results); - } - - debug!("Final search results: {:?}", results); Ok(results) } } @@ -3007,287 +2669,13 @@ impl FileBackend { result } - /// Build a search index for a publisher - #[allow(dead_code)] - fn build_search_index(&self, publisher: &str) -> Result<()> { - info!("Building search index for publisher: {}", publisher); - - // Create a new search index - let mut index = SearchIndex::new(); - - // Get the publisher's package directory - let publisher_pkg_dir = Self::construct_package_dir(&self.path, publisher, ""); - - // Check if the publisher directory exists - if publisher_pkg_dir.exists() { - // Use walkdir to recursively walk through the directory and process package manifests - for entry in WalkDir::new(&publisher_pkg_dir) - .follow_links(true) - .into_iter() - .filter_map(|e| e.ok()) - { - let path = entry.path(); - - if path.is_file() { - // Try to read the first few bytes of the file to check if it's a manifest file - let mut file = match fs::File::open(&path) { - Ok(file) => file, - Err(err) => { - error!( - "FileBackend::build_search_index: Error opening file {}: {}", - path.display(), - err - ); - continue; - } - }; - - let mut buffer = [0; 1024]; - let bytes_read = match file.read(&mut buffer) { - Ok(bytes) => bytes, - Err(err) => { - error!( - "FileBackend::build_search_index: Error reading file {}: {}", - path.display(), - err - ); - continue; - } - }; - - // Check if the file starts with a valid manifest marker - if bytes_read == 0 - || (buffer[0] != b'{' && buffer[0] != b'<' && buffer[0] != b's') - { - continue; - } - - // Parse the manifest file to get package information - match Manifest::parse_file(&path) { - Ok(manifest) => { - // Look for the pkg.fmri attribute - let fmri_opt = manifest - .attributes - .iter() - .find(|a| a.key == "pkg.fmri") - .and_then(|a| a.values.first()); - - if let Some(fmri_str) = fmri_opt { - // Parse the FMRI using our Fmri type - match Fmri::parse(fmri_str) { - Ok(parsed_fmri) => { - let fmri = parsed_fmri.to_string(); - let stem = parsed_fmri.stem().to_string(); - - // Add package mapping - index.packages.insert(fmri.clone(), stem.clone()); - - // 1. Index package stem - index.add_term(&stem, &fmri, "pkg", "name", &stem, None); - for part in stem.split('/') { - if part != stem { - index.add_term( - part, &fmri, "pkg", "name", &stem, None, - ); - } - } - - // 2. Index Publisher - if let Some(publ) = &parsed_fmri.publisher { - index.add_term( - publ, - &fmri, - "pkg", - "publisher", - publ, - None, - ); - } - - // 3. Index Version - let version = parsed_fmri.version(); - if !version.is_empty() { - index.add_term( - &version, &fmri, "pkg", "version", &version, None, - ); - } - - // 4. Index Files with attributes - for file in manifest.files { - let mut attrs = BTreeMap::new(); - attrs.insert("path".to_string(), file.path.clone()); - attrs.insert("owner".to_string(), file.owner.clone()); - attrs.insert("group".to_string(), file.group.clone()); - attrs.insert("mode".to_string(), file.mode.clone()); - - if let Some(payload) = &file.payload { - let arch_str = match payload.architecture { - PayloadArchitecture::I386 => Some("i386"), - PayloadArchitecture::SPARC => Some("sparc"), - _ => None, - }; - if let Some(a) = arch_str { - attrs.insert( - "elfarch".to_string(), - a.to_string(), - ); - } - - let bits_str = match payload.bitness { - PayloadBits::Bits64 => Some("64"), - PayloadBits::Bits32 => Some("32"), - _ => None, - }; - if let Some(b) = bits_str { - attrs.insert( - "elfbits".to_string(), - b.to_string(), - ); - } - - attrs.insert( - "pkg.content-hash".to_string(), - payload.primary_identifier.to_string(), - ); - } - - for prop in file.properties { - attrs.insert(prop.key, prop.value); - } - - // index=path - index.add_term( - &file.path, - &fmri, - "file", - "path", - &file.path, - Some(attrs.clone()), - ); - - // index=basename - if let Some(basename) = Path::new(&file.path) - .file_name() - .and_then(|s| s.to_str()) - { - index.add_term( - basename, - &fmri, - "file", - "basename", - &file.path, - Some(attrs), - ); - } - } - - // 5. Index Directories - for dir in manifest.directories { - let mut attrs = BTreeMap::new(); - attrs.insert("path".to_string(), dir.path.clone()); - attrs.insert("owner".to_string(), dir.owner.clone()); - attrs.insert("group".to_string(), dir.group.clone()); - attrs.insert("mode".to_string(), dir.mode.clone()); - - // index=path - index.add_term( - &dir.path, - &fmri, - "dir", - "path", - &dir.path, - Some(attrs.clone()), - ); - - // index=basename - if let Some(basename) = Path::new(&dir.path) - .file_name() - .and_then(|s| s.to_str()) - { - index.add_term( - basename, - &fmri, - "dir", - "basename", - &dir.path, - Some(attrs), - ); - } - } - - // 6. Index Dependencies - for dep in manifest.dependencies { - if let Some(dep_fmri) = &dep.fmri { - let dep_fmri_str = dep_fmri.to_string(); - let mut attrs = BTreeMap::new(); - - if !dep.dependency_type.is_empty() { - attrs.insert( - "type".to_string(), - dep.dependency_type.clone(), - ); - } - - for prop in dep.optional { - attrs.insert(prop.key, prop.value); - } - - index.add_term( - &dep_fmri_str, - &fmri, - "depend", - "fmri", - &dep_fmri_str, - Some(attrs.clone()), - ); - index.add_term( - dep_fmri.stem(), - &fmri, - "depend", - "fmri", - &dep_fmri_str, - Some(attrs), - ); - } - } - } - Err(err) => { - error!( - "FileBackend::build_search_index: Error parsing FMRI '{}': {}", - fmri_str, err - ); - } - } - } - } - Err(err) => { - error!( - "FileBackend::build_search_index: Error parsing manifest file {}: {}", - path.display(), - err - ); - } - } - } - } - } - - // Save the index to a file - let index_path = self.path.join("index").join(publisher).join("search.json"); - index.save(&index_path)?; - - info!("Search index built for publisher: {}", publisher); - - Ok(()) - } - - /// Search for packages with detailed results + /// Search for packages with detailed results using SQLite FTS5. pub fn search_detailed( &self, query: &str, publisher: Option<&str>, limit: Option, - case_sensitive: bool, + _case_sensitive: bool, ) -> Result> { debug!("Searching (detailed) for packages with query: {}", query); @@ -3303,28 +2691,74 @@ impl FileBackend { let mut results = Vec::new(); - // For each publisher, search the index - for pub_name in publishers { - if let Ok(Some(index)) = self.get_search_index(&pub_name) { - // Search the index - let entries = index.search(query, case_sensitive, limit); - results.extend(entries); + for pub_name in &publishers { + let fts_path = self.shard_dir(pub_name).join("fts.db"); + let active_path = self.shard_dir(pub_name).join("active.db"); + + if fts_path.exists() { + // Use FTS5 search + let fts_results = crate::repository::sqlite_catalog::search_fts( + &fts_path, + query, + Some(pub_name), + limit, + ) + .map_err(|e| { + RepositoryError::Other(format!("FTS search error: {}", e.message)) + })?; + + if fts_results.is_empty() { + continue; + } + + // Resolve full FMRIs from active.db + let stems: Vec<(String, String)> = fts_results + .iter() + .map(|r| (r.stem.clone(), r.publisher.clone())) + .collect(); + + let fmri_map = if active_path.exists() { + crate::repository::sqlite_catalog::resolve_latest_fmris(&active_path, &stems) + .unwrap_or_default() + } else { + std::collections::HashMap::new() + }; + + for fts_result in fts_results { + let key = (fts_result.stem.clone(), fts_result.publisher.clone()); + let fmri = fmri_map + .get(&key) + .cloned() + .unwrap_or_else(|| { + format!("pkg://{}/{}", fts_result.publisher, fts_result.stem) + }); + + results.push(IndexEntry { + fmri, + action_type: "set".to_string(), + index_type: "name".to_string(), + value: fts_result.stem.clone(), + token: fts_result.stem, + attributes: BTreeMap::new(), + }); + } } else { debug!( - "No search index found for publisher: {}, falling back to simple listing", + "No fts.db found for publisher: {}, falling back to simple listing", pub_name ); - // Fallback: list packages and convert to basic IndexEntries - let all_packages = self.list_packages(Some(&pub_name), None)?; + // Fallback: list packages and filter by stem + let all_packages = self.list_packages(Some(pub_name), None)?; + let query_lower = query.to_lowercase(); let matching_packages: Vec = all_packages .into_iter() - .filter(|pkg| pkg.fmri.stem().contains(query)) + .filter(|pkg| pkg.fmri.stem().to_lowercase().contains(&query_lower)) .map(|pkg| { let fmri = pkg.fmri.to_string(); let stem = pkg.fmri.stem().to_string(); IndexEntry { fmri, - action_type: "pkg".to_string(), + action_type: "set".to_string(), index_type: "name".to_string(), value: stem.clone(), token: stem, @@ -3344,17 +2778,6 @@ impl FileBackend { Ok(results) } - /// Get the search index for a publisher - fn get_search_index(&self, publisher: &str) -> Result> { - let index_path = self.path.join("index").join(publisher).join("search.json"); - - if index_path.exists() { - Ok(Some(SearchIndex::load(&index_path)?)) - } else { - Ok(None) - } - } - #[cfg(test)] pub fn test_publish_files(&mut self, test_dir: &Path) -> Result<()> { debug!("Testing file publishing..."); diff --git a/libips/src/repository/sqlite_catalog.rs b/libips/src/repository/sqlite_catalog.rs index 3480de3..321242c 100644 --- a/libips/src/repository/sqlite_catalog.rs +++ b/libips/src/repository/sqlite_catalog.rs @@ -13,10 +13,10 @@ use crate::actions::Manifest; use crate::fmri::Fmri; use crate::repository::catalog::CatalogManager; use miette::Diagnostic; -use rusqlite::Connection; +use rusqlite::{Connection, OpenFlags}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::fs; use std::path::Path; use thiserror::Error; @@ -71,7 +71,7 @@ CREATE INDEX IF NOT EXISTS idx_obsolete_fmri ON obsolete_packages(fmri); pub const FTS_SCHEMA: &str = r#" CREATE VIRTUAL TABLE IF NOT EXISTS package_search USING fts5(stem, publisher, summary, description, - content='', tokenize='unicode61'); + tokenize='unicode61'); "#; /// Schema for installed.db - tracks installed packages with manifest blobs. @@ -494,3 +494,257 @@ pub fn populate_obsolete_db(db_path: &Path, fmri: &Fmri) -> Result<(), ShardBuil // Note: compress_json_lz4, decode_manifest_bytes, and is_package_obsolete // are available as pub(crate) in crate::image::catalog and can be used // within libips but not re-exported. + +/// Result from an FTS5 search query. +#[derive(Debug, Clone)] +pub struct FtsSearchResult { + pub stem: String, + pub publisher: String, + pub summary: String, + pub description: String, + pub rank: f64, +} + +/// Sanitize a user query string for safe use in an FTS5 MATCH expression. +/// +/// Wraps each whitespace-delimited token in double quotes to treat it as a +/// literal phrase, preventing FTS5 syntax injection. Glob-style `*` at the end +/// of a token is converted to FTS5 prefix syntax (e.g. `web*` → `"web" *`). +pub fn sanitize_fts_query(raw: &str) -> String { + let tokens: Vec<&str> = raw.split_whitespace().collect(); + if tokens.is_empty() { + return String::new(); + } + + let mut parts = Vec::new(); + for token in tokens { + // Strip surrounding quotes if present + let token = token.trim_matches('"'); + if token.is_empty() { + continue; + } + + if token.ends_with('*') { + let prefix = &token[..token.len() - 1]; + if !prefix.is_empty() { + // FTS5 prefix query: "term" * (but FTS5 prefix is just term*) + // Actually FTS5 prefix syntax is: term* + // We need to escape the term part but keep the * + let escaped = prefix.replace('"', "\"\""); + parts.push(format!("\"{}\"*", escaped)); + } + } else { + let escaped = token.replace('"', "\"\""); + parts.push(format!("\"{}\"", escaped)); + } + } + parts.join(" ") +} + +/// Search the FTS5 index in fts.db for packages matching a query. +pub fn search_fts( + fts_db_path: &Path, + query: &str, + publisher_filter: Option<&str>, + limit: Option, +) -> Result, ShardBuildError> { + let conn = Connection::open_with_flags(fts_db_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?; + + let sanitized = sanitize_fts_query(query); + if sanitized.is_empty() { + return Ok(Vec::new()); + } + + let effective_limit = limit.unwrap_or(100) as i64; + + // FTS5 columns cannot be filtered with standard WHERE clauses, + // so we fetch all matches and filter by publisher in Rust. + let mut stmt = conn.prepare( + "SELECT stem, publisher, summary, description, rank \ + FROM package_search \ + WHERE package_search MATCH ?1 \ + ORDER BY rank \ + LIMIT ?2", + )?; + let rows = stmt.query_map(rusqlite::params![sanitized, effective_limit], |row| { + Ok(FtsSearchResult { + stem: row.get(0)?, + publisher: row.get(1)?, + summary: row.get(2)?, + description: row.get(3)?, + rank: row.get(4)?, + }) + })?; + let mut results: Vec = rows.collect::, _>>()?; + + if let Some(publisher) = publisher_filter { + results.retain(|r| r.publisher == publisher); + } + + Ok(results) +} + +/// Look up the latest FMRI for each (stem, publisher) pair from active.db. +pub fn resolve_latest_fmris( + active_db_path: &Path, + stems: &[(String, String)], +) -> Result, ShardBuildError> { + let conn = Connection::open_with_flags(active_db_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?; + let mut stmt = conn.prepare( + "SELECT fmri FROM packages WHERE stem = ?1 AND publisher = ?2 ORDER BY rowid DESC LIMIT 1", + )?; + + let mut result = HashMap::new(); + for (stem, publisher) in stems { + if let Ok(fmri) = stmt.query_row(rusqlite::params![stem, publisher], |row| { + row.get::<_, String>(0) + }) { + result.insert((stem.clone(), publisher.clone()), fmri); + } + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_test_fts_db(path: &Path) { + let conn = Connection::open(path).unwrap(); + conn.execute_batch(FTS_SCHEMA).unwrap(); + conn.execute( + "INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["web/server/nginx", "openindiana.org", "NGINX web server", "High performance HTTP server and reverse proxy"], + ).unwrap(); + conn.execute( + "INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["web/server/apache", "openindiana.org", "Apache HTTP Server", "The Apache HTTP Server Project"], + ).unwrap(); + conn.execute( + "INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["database/postgresql", "openindiana.org", "PostgreSQL database", "PostgreSQL object-relational database management"], + ).unwrap(); + conn.execute( + "INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params!["runtime/coreutils", "otherpub", "Core utilities", "Essential command line utilities"], + ).unwrap(); + } + + fn create_test_active_db(path: &Path) { + let conn = Connection::open(path).unwrap(); + conn.execute_batch(ACTIVE_SCHEMA).unwrap(); + conn.execute( + "INSERT INTO packages (stem, version, publisher) VALUES (?1, ?2, ?3)", + rusqlite::params!["web/server/nginx", "1.24.0,5.11-2024.0.1.0", "openindiana.org"], + ).unwrap(); + conn.execute( + "INSERT INTO packages (stem, version, publisher) VALUES (?1, ?2, ?3)", + rusqlite::params!["web/server/apache", "2.4.58,5.11-2024.0.1.0", "openindiana.org"], + ).unwrap(); + conn.execute( + "INSERT INTO packages (stem, version, publisher) VALUES (?1, ?2, ?3)", + rusqlite::params!["database/postgresql", "16.1,5.11-2024.0.1.0", "openindiana.org"], + ).unwrap(); + } + + #[test] + fn test_search_fts_basic() { + let dir = tempfile::tempdir().unwrap(); + let fts_path = dir.path().join("fts.db"); + create_test_fts_db(&fts_path); + + let results = search_fts(&fts_path, "nginx", None, None).unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].stem, "web/server/nginx"); + } + + #[test] + fn test_search_fts_multiple_results() { + let dir = tempfile::tempdir().unwrap(); + let fts_path = dir.path().join("fts.db"); + create_test_fts_db(&fts_path); + + // "server" appears in both nginx and apache summaries + let results = search_fts(&fts_path, "server", None, None).unwrap(); + assert!(results.len() >= 2); + } + + #[test] + fn test_search_fts_prefix() { + let dir = tempfile::tempdir().unwrap(); + let fts_path = dir.path().join("fts.db"); + create_test_fts_db(&fts_path); + + let results = search_fts(&fts_path, "post*", None, None).unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].stem, "database/postgresql"); + } + + #[test] + fn test_search_fts_no_results() { + let dir = tempfile::tempdir().unwrap(); + let fts_path = dir.path().join("fts.db"); + create_test_fts_db(&fts_path); + + let results = search_fts(&fts_path, "nonexistent", None, None).unwrap(); + assert!(results.is_empty()); + } + + #[test] + fn test_search_fts_publisher_filter() { + let dir = tempfile::tempdir().unwrap(); + let fts_path = dir.path().join("fts.db"); + create_test_fts_db(&fts_path); + + // "coreutils" matches the otherpub entry only + let results = + search_fts(&fts_path, "coreutils", Some("otherpub"), None).unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].publisher, "otherpub"); + + // Same query but filtered to openindiana.org should return nothing + let results = + search_fts(&fts_path, "coreutils", Some("openindiana.org"), None).unwrap(); + assert!(results.is_empty()); + } + + #[test] + fn test_search_fts_limit() { + let dir = tempfile::tempdir().unwrap(); + let fts_path = dir.path().join("fts.db"); + create_test_fts_db(&fts_path); + + let results = search_fts(&fts_path, "server", None, Some(1)).unwrap(); + assert_eq!(results.len(), 1); + } + + #[test] + fn test_resolve_latest_fmris() { + let dir = tempfile::tempdir().unwrap(); + let active_path = dir.path().join("active.db"); + create_test_active_db(&active_path); + + let stems = vec![ + ("web/server/nginx".to_string(), "openindiana.org".to_string()), + ("database/postgresql".to_string(), "openindiana.org".to_string()), + ("nonexistent/pkg".to_string(), "openindiana.org".to_string()), + ]; + + let fmris = resolve_latest_fmris(&active_path, &stems).unwrap(); + assert_eq!(fmris.len(), 2); // nonexistent should not be in results + assert!(fmris[&("web/server/nginx".to_string(), "openindiana.org".to_string())] + .contains("nginx")); + assert!(fmris[&("database/postgresql".to_string(), "openindiana.org".to_string())] + .contains("postgresql")); + } + + #[test] + fn test_sanitize_fts_query() { + assert_eq!(sanitize_fts_query("nginx"), "\"nginx\""); + assert_eq!(sanitize_fts_query("web server"), "\"web\" \"server\""); + assert_eq!(sanitize_fts_query("post*"), "\"post\"*"); + assert_eq!(sanitize_fts_query(""), ""); + assert_eq!(sanitize_fts_query(" "), ""); + } +} diff --git a/pkg6depotd/src/http/handlers/versions.rs b/pkg6depotd/src/http/handlers/versions.rs index c097442..ec1e356 100644 --- a/pkg6depotd/src/http/handlers/versions.rs +++ b/pkg6depotd/src/http/handlers/versions.rs @@ -82,10 +82,10 @@ pub async fn get_versions() -> impl IntoResponse { op: Operation::Publisher, versions: vec![0, 1], }, - //SupportedOperation { - // op: Operation::Search, - // versions: vec![0, 1], - //}, + SupportedOperation { + op: Operation::Search, + versions: vec![0, 1], + }, ], }; diff --git a/pkg6depotd/tests/integration_tests.rs b/pkg6depotd/tests/integration_tests.rs index 6550c94..cb82565 100644 --- a/pkg6depotd/tests/integration_tests.rs +++ b/pkg6depotd/tests/integration_tests.rs @@ -483,3 +483,118 @@ async fn test_multiple_publishers_default_route() { assert!(names.contains(&"pub1".to_string())); assert!(names.contains(&"pub2".to_string())); } + +#[tokio::test] +async fn test_search_endpoint() { + // Setup repo with a published package (rebuild builds fts.db) + let temp_dir = TempDir::new().unwrap(); + let repo_path = setup_repo(&temp_dir); + + let config = Config { + server: ServerConfig { + bind: vec!["127.0.0.1:0".to_string()], + workers: None, + max_connections: None, + reuseport: None, + cache_max_age: Some(3600), + tls_cert: None, + tls_key: None, + }, + repository: RepositoryConfig { + root: repo_path.clone(), + mode: Some("readonly".to_string()), + }, + telemetry: None, + publishers: None, + admin: None, + oauth2: None, + }; + + let repo = DepotRepo::new(&config).unwrap(); + let state = Arc::new(repo); + let router = http::routes::app_router(state); + + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + tokio::spawn(async move { + http::server::run(router, listener).await.unwrap(); + }); + + let client = reqwest::Client::new(); + let base_url = format!("http://{}", addr); + + // 1. Test versions endpoint now advertises search + let resp = client + .get(format!("{}/versions/0/", base_url)) + .send() + .await + .unwrap(); + let text = resp.text().await.unwrap(); + assert!(text.contains("search 0 1"), "versions should advertise search"); + + // 2. Test search v0 - search for "example" (the package stem) + let resp = client + .get(format!("{}/test/search/0/example", base_url)) + .send() + .await + .unwrap(); + assert!(resp.status().is_success(), "search v0 should succeed"); + let body = resp.text().await.unwrap(); + assert!(body.contains("example"), "search v0 should find 'example' package"); + + // 3. Test search v1 - search for "example" + let resp = client + .get(format!( + "{}/test/search/1/False_2_None_None_example", + base_url + )) + .send() + .await + .unwrap(); + assert!(resp.status().is_success(), "search v1 should succeed"); + let body = resp.text().await.unwrap(); + assert!( + body.contains("Return from search v1"), + "search v1 should have header" + ); + assert!(body.contains("example"), "search v1 should find 'example' package"); + + // 4. Test search v0 - no results + let resp = client + .get(format!("{}/test/search/0/nonexistentpackage", base_url)) + .send() + .await + .unwrap(); + assert!(resp.status().is_success()); + let body = resp.text().await.unwrap(); + assert!(body.is_empty(), "search for nonexistent should return empty body"); + + // 5. Test search v1 - no results returns 204 + let resp = client + .get(format!( + "{}/test/search/1/False_2_None_None_nonexistentpackage", + base_url + )) + .send() + .await + .unwrap(); + assert_eq!( + resp.status().as_u16(), + 204, + "search v1 with no results should return 204" + ); + + // 6. Test search by summary keyword "Test Package" + let resp = client + .get(format!("{}/test/search/0/Test", base_url)) + .send() + .await + .unwrap(); + assert!(resp.status().is_success()); + let body = resp.text().await.unwrap(); + assert!( + body.contains("example"), + "search by summary keyword should find the package" + ); +}