feat: Add full-text search via FTS5 and enable search endpoints

Add FTS5 search functions to sqlite_catalog (sanitize_fts_query,
search_fts, resolve_latest_fmris), enable search in versions response,
add integration tests, and remove legacy search code from file_backend.
This commit is contained in:
Till Wegmueller 2026-03-14 22:01:48 +01:00
parent 6e60e9cdd1
commit e83f2b7284
No known key found for this signature in database
4 changed files with 446 additions and 654 deletions

View file

@ -19,12 +19,10 @@ use std::str::FromStr;
use std::sync::Mutex; use std::sync::Mutex;
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use tracing::{debug, error, info}; use tracing::{debug, error, info};
use walkdir::WalkDir;
use crate::actions::{File as FileAction, Manifest}; use crate::actions::{File as FileAction, Manifest};
use crate::digest::Digest; use crate::digest::Digest;
use crate::fmri::Fmri; use crate::fmri::Fmri;
use crate::payload::{Payload, PayloadArchitecture, PayloadBits, PayloadCompressionAlgorithm}; use crate::payload::{Payload, PayloadCompressionAlgorithm};
use super::catalog_writer; use super::catalog_writer;
use super::{ use super::{
@ -66,60 +64,6 @@ pub struct IndexEntry {
pub attributes: BTreeMap<String, String>, pub attributes: BTreeMap<String, String>,
} }
struct SearchQuery {
pkg: Option<String>,
action: Option<String>,
index: Option<String>,
token: String,
}
fn parse_query(query: &str) -> SearchQuery {
if !query.contains(':') {
return SearchQuery {
pkg: None,
action: None,
index: None,
token: query.to_string(),
};
}
let parts: Vec<&str> = query.split(':').collect();
let get_opt = |s: &str| {
if s.is_empty() {
None
} else {
Some(s.to_string())
}
};
match parts.len() {
2 => SearchQuery {
pkg: None,
action: None,
index: get_opt(parts[0]),
token: parts[1].to_string(),
},
3 => SearchQuery {
pkg: None,
action: get_opt(parts[0]),
index: get_opt(parts[1]),
token: parts[2].to_string(),
},
4 => SearchQuery {
pkg: get_opt(parts[0]),
action: get_opt(parts[1]),
index: get_opt(parts[2]),
token: parts[3].to_string(),
},
_ => SearchQuery {
pkg: None,
action: None,
index: None,
token: query.to_string(),
},
}
}
pub fn glob_to_regex(pattern: &str) -> String { pub fn glob_to_regex(pattern: &str) -> String {
let mut regex = String::from("^"); let mut regex = String::from("^");
for c in pattern.chars() { for c in pattern.chars() {
@ -137,223 +81,6 @@ pub fn glob_to_regex(pattern: &str) -> String {
regex regex
} }
/// Search index for a repository
#[derive(Serialize, Deserialize, Debug, Clone)]
struct SearchIndex {
/// Maps search terms to list of index entries
terms: HashMap<String, Vec<IndexEntry>>,
/// Maps package FMRIs to package names
packages: HashMap<String, String>,
/// Last updated timestamp
updated: u64,
}
impl SearchIndex {
/// Create a new empty search index
#[allow(dead_code)]
fn new() -> Self {
SearchIndex {
terms: HashMap::new(),
packages: HashMap::new(),
updated: SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
}
}
/// Add a term to the index for a package
#[allow(dead_code)]
fn add_term(
&mut self,
term: &str,
fmri: &str,
action_type: &str,
index_type: &str,
value: &str,
attributes: Option<BTreeMap<String, String>>,
) {
let token = term.to_string();
// Convert term to lowercase for case-insensitive search
let term_lower = term.to_lowercase();
let entry = IndexEntry {
fmri: fmri.to_string(),
action_type: action_type.to_string(),
index_type: index_type.to_string(),
value: value.to_string(),
token,
attributes: attributes.unwrap_or_default(),
};
// Add the term to the index
self.terms
.entry(term_lower)
.or_insert_with(Vec::new)
.push(entry);
}
/// Search the index for packages matching a query
fn search(&self, query: &str, case_sensitive: bool, limit: Option<usize>) -> Vec<IndexEntry> {
// Split the query into terms (whitespace)
let terms: Vec<&str> = query.split_whitespace().collect();
// If no terms, return an empty result
if terms.is_empty() {
return Vec::new();
}
// Find packages that match all terms
let mut fmri_sets: Vec<HashSet<String>> = Vec::new();
let mut all_entries: Vec<IndexEntry> = Vec::new();
for term_str in terms {
let parsed = parse_query(term_str);
let token_has_wildcard = parsed.token.contains('*') || parsed.token.contains('?');
let token_lower = parsed.token.to_lowercase();
let mut term_entries: Vec<&IndexEntry> = Vec::new();
if token_has_wildcard {
let regex_str = glob_to_regex(&token_lower);
if let Ok(re) = Regex::new(&regex_str) {
for (key, entries) in &self.terms {
if re.is_match(key) {
term_entries.extend(entries);
}
}
}
} else {
if let Some(entries) = self.terms.get(&token_lower) {
term_entries.extend(entries);
}
}
// Filter entries based on structured query and case sensitivity
let filtered: Vec<&IndexEntry> = term_entries
.into_iter()
.filter(|e| {
// Check Index Type
if let Some(idx) = &parsed.index {
if &e.index_type != idx {
return false;
}
}
// Check Action Type
if let Some(act) = &parsed.action {
if &e.action_type != act {
return false;
}
}
// Check Package Name (FMRI)
if let Some(pkg) = &parsed.pkg {
let pkg_has_wildcard = pkg.contains('*') || pkg.contains('?');
if pkg_has_wildcard {
let re_str = glob_to_regex(&pkg.to_lowercase());
if let Ok(re) = Regex::new(&re_str) {
// FMRIs are usually lowercase, but let's compare lowercase to be safe/consistent
if !re.is_match(&e.fmri.to_lowercase()) {
return false;
}
}
} else {
if !e.fmri.contains(pkg) {
return false;
}
}
}
// Check Case Sensitivity on VALUE
if case_sensitive {
if token_has_wildcard {
let re_str = glob_to_regex(&parsed.token); // Original token
if let Ok(re) = Regex::new(&re_str) {
if !re.is_match(&e.token) {
return false;
}
}
} else {
if e.token != parsed.token {
return false;
}
}
}
true
})
.collect();
if filtered.is_empty() {
return Vec::new(); // Term found no matches
}
let fmris: HashSet<String> = filtered.iter().map(|e| e.fmri.clone()).collect();
fmri_sets.push(fmris);
all_entries.extend(filtered.into_iter().cloned());
}
// Intersect FMRIs
let mut common_fmris = fmri_sets[0].clone();
for set in &fmri_sets[1..] {
common_fmris.retain(|fmri| set.contains(fmri));
if common_fmris.is_empty() {
return Vec::new();
}
}
// Filter entries
let mut results: Vec<IndexEntry> = Vec::new();
for entry in all_entries {
if common_fmris.contains(&entry.fmri) {
results.push(entry);
}
}
results.sort_by(|a, b| {
a.fmri
.cmp(&b.fmri)
.then(a.action_type.cmp(&b.action_type))
.then(a.index_type.cmp(&b.index_type))
.then(a.value.cmp(&b.value))
});
results.dedup();
if let Some(max_results) = limit {
results.truncate(max_results);
}
results
}
/// Save the index to a file
#[allow(dead_code)]
fn save(&self, path: &Path) -> Result<()> {
// Create the parent directory if it doesn't exist
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
// Serialize the index to JSON
let json = serde_json::to_string(self)?;
// Write the JSON to the file
fs::write(path, json)?;
Ok(())
}
/// Load the index from a file
fn load(path: &Path) -> Result<Self> {
// Read the file
let json = fs::read_to_string(path)?;
// Deserialize the JSON
let index: SearchIndex = serde_json::from_str(&json)?;
Ok(index)
}
}
/// Repository implementation that uses the local filesystem /// Repository implementation that uses the local filesystem
pub struct FileBackend { pub struct FileBackend {
pub path: PathBuf, pub path: PathBuf,
@ -1714,87 +1441,22 @@ impl ReadableRepository for FileBackend {
limit: Option<usize>, limit: Option<usize>,
) -> Result<Vec<PackageInfo>> { ) -> Result<Vec<PackageInfo>> {
debug!("Searching for packages with query: {}", query); debug!("Searching for packages with query: {}", query);
debug!("Publisher: {:?}", publisher);
debug!("Limit: {:?}", limit);
// If no publisher is specified, use the default publisher if available let entries = self.search_detailed(query, publisher, limit, false)?;
let publisher = publisher.or_else(|| self.config.default_publisher.as_deref());
debug!("Effective publisher: {:?}", publisher);
// If still no publisher, we need to search all publishers // Deduplicate by FMRI and convert to PackageInfo
let publishers = if let Some(pub_name) = publisher {
vec![pub_name.to_string()]
} else {
self.config.publishers.clone()
};
debug!("Publishers to search: {:?}", publishers);
let mut results = Vec::new();
// For each publisher, search the index
for pub_name in publishers {
debug!("Searching publisher: {}", pub_name);
// Check if the index exists
let index_path = self.path.join("index").join(&pub_name).join("search.json");
debug!("Index path: {}", index_path.display());
debug!("Index exists: {}", index_path.exists());
if let Ok(Some(index)) = self.get_search_index(&pub_name) {
debug!("Got search index for publisher: {}", pub_name);
debug!("Index terms: {:?}", index.terms.keys().collect::<Vec<_>>());
// Search the index
let entries = index.search(query, false, limit);
debug!("Search results (entries): {:?}", entries);
// Convert entries to PackageInfo
// Use a HashSet to track added FMRIs to avoid duplicates
let mut added_fmris = HashSet::new(); let mut added_fmris = HashSet::new();
let mut results = Vec::new();
for entry in entries { for entry in entries {
if added_fmris.contains(&entry.fmri) { if added_fmris.contains(&entry.fmri) {
continue; continue;
} }
if let Ok(fmri) = Fmri::parse(&entry.fmri) { if let Ok(fmri) = Fmri::parse(&entry.fmri) {
debug!("Adding package to results: {}", fmri);
results.push(PackageInfo { fmri }); results.push(PackageInfo { fmri });
added_fmris.insert(entry.fmri); added_fmris.insert(entry.fmri);
} else {
debug!("Failed to parse FMRI: {}", entry.fmri);
}
}
} else {
debug!("No search index found for publisher: {}", pub_name);
debug!("Falling back to simple search");
// If the index doesn't exist, fall back to the simple search
let all_packages = self.list_packages(Some(&pub_name), None)?;
debug!("All packages: {:?}", all_packages);
// Filter packages by the query string
let matching_packages: Vec<PackageInfo> = all_packages
.into_iter()
.filter(|pkg| {
// Match against package name
let matches = pkg.fmri.stem().contains(query);
debug!("Package: {}, Matches: {}", pkg.fmri.stem(), matches);
matches
})
.collect();
debug!("Matching packages: {:?}", matching_packages);
// Add matching packages to the results
results.extend(matching_packages);
} }
} }
// Apply limit if specified
if let Some(max_results) = limit {
results.truncate(max_results);
}
debug!("Final search results: {:?}", results);
Ok(results) Ok(results)
} }
} }
@ -3007,287 +2669,13 @@ impl FileBackend {
result result
} }
/// Build a search index for a publisher /// Search for packages with detailed results using SQLite FTS5.
#[allow(dead_code)]
fn build_search_index(&self, publisher: &str) -> Result<()> {
info!("Building search index for publisher: {}", publisher);
// Create a new search index
let mut index = SearchIndex::new();
// Get the publisher's package directory
let publisher_pkg_dir = Self::construct_package_dir(&self.path, publisher, "");
// Check if the publisher directory exists
if publisher_pkg_dir.exists() {
// Use walkdir to recursively walk through the directory and process package manifests
for entry in WalkDir::new(&publisher_pkg_dir)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
{
let path = entry.path();
if path.is_file() {
// Try to read the first few bytes of the file to check if it's a manifest file
let mut file = match fs::File::open(&path) {
Ok(file) => file,
Err(err) => {
error!(
"FileBackend::build_search_index: Error opening file {}: {}",
path.display(),
err
);
continue;
}
};
let mut buffer = [0; 1024];
let bytes_read = match file.read(&mut buffer) {
Ok(bytes) => bytes,
Err(err) => {
error!(
"FileBackend::build_search_index: Error reading file {}: {}",
path.display(),
err
);
continue;
}
};
// Check if the file starts with a valid manifest marker
if bytes_read == 0
|| (buffer[0] != b'{' && buffer[0] != b'<' && buffer[0] != b's')
{
continue;
}
// Parse the manifest file to get package information
match Manifest::parse_file(&path) {
Ok(manifest) => {
// Look for the pkg.fmri attribute
let fmri_opt = manifest
.attributes
.iter()
.find(|a| a.key == "pkg.fmri")
.and_then(|a| a.values.first());
if let Some(fmri_str) = fmri_opt {
// Parse the FMRI using our Fmri type
match Fmri::parse(fmri_str) {
Ok(parsed_fmri) => {
let fmri = parsed_fmri.to_string();
let stem = parsed_fmri.stem().to_string();
// Add package mapping
index.packages.insert(fmri.clone(), stem.clone());
// 1. Index package stem
index.add_term(&stem, &fmri, "pkg", "name", &stem, None);
for part in stem.split('/') {
if part != stem {
index.add_term(
part, &fmri, "pkg", "name", &stem, None,
);
}
}
// 2. Index Publisher
if let Some(publ) = &parsed_fmri.publisher {
index.add_term(
publ,
&fmri,
"pkg",
"publisher",
publ,
None,
);
}
// 3. Index Version
let version = parsed_fmri.version();
if !version.is_empty() {
index.add_term(
&version, &fmri, "pkg", "version", &version, None,
);
}
// 4. Index Files with attributes
for file in manifest.files {
let mut attrs = BTreeMap::new();
attrs.insert("path".to_string(), file.path.clone());
attrs.insert("owner".to_string(), file.owner.clone());
attrs.insert("group".to_string(), file.group.clone());
attrs.insert("mode".to_string(), file.mode.clone());
if let Some(payload) = &file.payload {
let arch_str = match payload.architecture {
PayloadArchitecture::I386 => Some("i386"),
PayloadArchitecture::SPARC => Some("sparc"),
_ => None,
};
if let Some(a) = arch_str {
attrs.insert(
"elfarch".to_string(),
a.to_string(),
);
}
let bits_str = match payload.bitness {
PayloadBits::Bits64 => Some("64"),
PayloadBits::Bits32 => Some("32"),
_ => None,
};
if let Some(b) = bits_str {
attrs.insert(
"elfbits".to_string(),
b.to_string(),
);
}
attrs.insert(
"pkg.content-hash".to_string(),
payload.primary_identifier.to_string(),
);
}
for prop in file.properties {
attrs.insert(prop.key, prop.value);
}
// index=path
index.add_term(
&file.path,
&fmri,
"file",
"path",
&file.path,
Some(attrs.clone()),
);
// index=basename
if let Some(basename) = Path::new(&file.path)
.file_name()
.and_then(|s| s.to_str())
{
index.add_term(
basename,
&fmri,
"file",
"basename",
&file.path,
Some(attrs),
);
}
}
// 5. Index Directories
for dir in manifest.directories {
let mut attrs = BTreeMap::new();
attrs.insert("path".to_string(), dir.path.clone());
attrs.insert("owner".to_string(), dir.owner.clone());
attrs.insert("group".to_string(), dir.group.clone());
attrs.insert("mode".to_string(), dir.mode.clone());
// index=path
index.add_term(
&dir.path,
&fmri,
"dir",
"path",
&dir.path,
Some(attrs.clone()),
);
// index=basename
if let Some(basename) = Path::new(&dir.path)
.file_name()
.and_then(|s| s.to_str())
{
index.add_term(
basename,
&fmri,
"dir",
"basename",
&dir.path,
Some(attrs),
);
}
}
// 6. Index Dependencies
for dep in manifest.dependencies {
if let Some(dep_fmri) = &dep.fmri {
let dep_fmri_str = dep_fmri.to_string();
let mut attrs = BTreeMap::new();
if !dep.dependency_type.is_empty() {
attrs.insert(
"type".to_string(),
dep.dependency_type.clone(),
);
}
for prop in dep.optional {
attrs.insert(prop.key, prop.value);
}
index.add_term(
&dep_fmri_str,
&fmri,
"depend",
"fmri",
&dep_fmri_str,
Some(attrs.clone()),
);
index.add_term(
dep_fmri.stem(),
&fmri,
"depend",
"fmri",
&dep_fmri_str,
Some(attrs),
);
}
}
}
Err(err) => {
error!(
"FileBackend::build_search_index: Error parsing FMRI '{}': {}",
fmri_str, err
);
}
}
}
}
Err(err) => {
error!(
"FileBackend::build_search_index: Error parsing manifest file {}: {}",
path.display(),
err
);
}
}
}
}
}
// Save the index to a file
let index_path = self.path.join("index").join(publisher).join("search.json");
index.save(&index_path)?;
info!("Search index built for publisher: {}", publisher);
Ok(())
}
/// Search for packages with detailed results
pub fn search_detailed( pub fn search_detailed(
&self, &self,
query: &str, query: &str,
publisher: Option<&str>, publisher: Option<&str>,
limit: Option<usize>, limit: Option<usize>,
case_sensitive: bool, _case_sensitive: bool,
) -> Result<Vec<IndexEntry>> { ) -> Result<Vec<IndexEntry>> {
debug!("Searching (detailed) for packages with query: {}", query); debug!("Searching (detailed) for packages with query: {}", query);
@ -3303,28 +2691,74 @@ impl FileBackend {
let mut results = Vec::new(); let mut results = Vec::new();
// For each publisher, search the index for pub_name in &publishers {
for pub_name in publishers { let fts_path = self.shard_dir(pub_name).join("fts.db");
if let Ok(Some(index)) = self.get_search_index(&pub_name) { let active_path = self.shard_dir(pub_name).join("active.db");
// Search the index
let entries = index.search(query, case_sensitive, limit); if fts_path.exists() {
results.extend(entries); // Use FTS5 search
let fts_results = crate::repository::sqlite_catalog::search_fts(
&fts_path,
query,
Some(pub_name),
limit,
)
.map_err(|e| {
RepositoryError::Other(format!("FTS search error: {}", e.message))
})?;
if fts_results.is_empty() {
continue;
}
// Resolve full FMRIs from active.db
let stems: Vec<(String, String)> = fts_results
.iter()
.map(|r| (r.stem.clone(), r.publisher.clone()))
.collect();
let fmri_map = if active_path.exists() {
crate::repository::sqlite_catalog::resolve_latest_fmris(&active_path, &stems)
.unwrap_or_default()
} else {
std::collections::HashMap::new()
};
for fts_result in fts_results {
let key = (fts_result.stem.clone(), fts_result.publisher.clone());
let fmri = fmri_map
.get(&key)
.cloned()
.unwrap_or_else(|| {
format!("pkg://{}/{}", fts_result.publisher, fts_result.stem)
});
results.push(IndexEntry {
fmri,
action_type: "set".to_string(),
index_type: "name".to_string(),
value: fts_result.stem.clone(),
token: fts_result.stem,
attributes: BTreeMap::new(),
});
}
} else { } else {
debug!( debug!(
"No search index found for publisher: {}, falling back to simple listing", "No fts.db found for publisher: {}, falling back to simple listing",
pub_name pub_name
); );
// Fallback: list packages and convert to basic IndexEntries // Fallback: list packages and filter by stem
let all_packages = self.list_packages(Some(&pub_name), None)?; let all_packages = self.list_packages(Some(pub_name), None)?;
let query_lower = query.to_lowercase();
let matching_packages: Vec<IndexEntry> = all_packages let matching_packages: Vec<IndexEntry> = all_packages
.into_iter() .into_iter()
.filter(|pkg| pkg.fmri.stem().contains(query)) .filter(|pkg| pkg.fmri.stem().to_lowercase().contains(&query_lower))
.map(|pkg| { .map(|pkg| {
let fmri = pkg.fmri.to_string(); let fmri = pkg.fmri.to_string();
let stem = pkg.fmri.stem().to_string(); let stem = pkg.fmri.stem().to_string();
IndexEntry { IndexEntry {
fmri, fmri,
action_type: "pkg".to_string(), action_type: "set".to_string(),
index_type: "name".to_string(), index_type: "name".to_string(),
value: stem.clone(), value: stem.clone(),
token: stem, token: stem,
@ -3344,17 +2778,6 @@ impl FileBackend {
Ok(results) Ok(results)
} }
/// Get the search index for a publisher
fn get_search_index(&self, publisher: &str) -> Result<Option<SearchIndex>> {
let index_path = self.path.join("index").join(publisher).join("search.json");
if index_path.exists() {
Ok(Some(SearchIndex::load(&index_path)?))
} else {
Ok(None)
}
}
#[cfg(test)] #[cfg(test)]
pub fn test_publish_files(&mut self, test_dir: &Path) -> Result<()> { pub fn test_publish_files(&mut self, test_dir: &Path) -> Result<()> {
debug!("Testing file publishing..."); debug!("Testing file publishing...");

View file

@ -13,10 +13,10 @@ use crate::actions::Manifest;
use crate::fmri::Fmri; use crate::fmri::Fmri;
use crate::repository::catalog::CatalogManager; use crate::repository::catalog::CatalogManager;
use miette::Diagnostic; use miette::Diagnostic;
use rusqlite::Connection; use rusqlite::{Connection, OpenFlags};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::collections::BTreeMap; use std::collections::{BTreeMap, HashMap};
use std::fs; use std::fs;
use std::path::Path; use std::path::Path;
use thiserror::Error; use thiserror::Error;
@ -71,7 +71,7 @@ CREATE INDEX IF NOT EXISTS idx_obsolete_fmri ON obsolete_packages(fmri);
pub const FTS_SCHEMA: &str = r#" pub const FTS_SCHEMA: &str = r#"
CREATE VIRTUAL TABLE IF NOT EXISTS package_search CREATE VIRTUAL TABLE IF NOT EXISTS package_search
USING fts5(stem, publisher, summary, description, USING fts5(stem, publisher, summary, description,
content='', tokenize='unicode61'); tokenize='unicode61');
"#; "#;
/// Schema for installed.db - tracks installed packages with manifest blobs. /// Schema for installed.db - tracks installed packages with manifest blobs.
@ -494,3 +494,257 @@ pub fn populate_obsolete_db(db_path: &Path, fmri: &Fmri) -> Result<(), ShardBuil
// Note: compress_json_lz4, decode_manifest_bytes, and is_package_obsolete // Note: compress_json_lz4, decode_manifest_bytes, and is_package_obsolete
// are available as pub(crate) in crate::image::catalog and can be used // are available as pub(crate) in crate::image::catalog and can be used
// within libips but not re-exported. // within libips but not re-exported.
/// Result from an FTS5 search query.
#[derive(Debug, Clone)]
pub struct FtsSearchResult {
pub stem: String,
pub publisher: String,
pub summary: String,
pub description: String,
pub rank: f64,
}
/// Sanitize a user query string for safe use in an FTS5 MATCH expression.
///
/// Wraps each whitespace-delimited token in double quotes to treat it as a
/// literal phrase, preventing FTS5 syntax injection. Glob-style `*` at the end
/// of a token is converted to FTS5 prefix syntax (e.g. `web*` → `"web" *`).
pub fn sanitize_fts_query(raw: &str) -> String {
let tokens: Vec<&str> = raw.split_whitespace().collect();
if tokens.is_empty() {
return String::new();
}
let mut parts = Vec::new();
for token in tokens {
// Strip surrounding quotes if present
let token = token.trim_matches('"');
if token.is_empty() {
continue;
}
if token.ends_with('*') {
let prefix = &token[..token.len() - 1];
if !prefix.is_empty() {
// FTS5 prefix query: "term" * (but FTS5 prefix is just term*)
// Actually FTS5 prefix syntax is: term*
// We need to escape the term part but keep the *
let escaped = prefix.replace('"', "\"\"");
parts.push(format!("\"{}\"*", escaped));
}
} else {
let escaped = token.replace('"', "\"\"");
parts.push(format!("\"{}\"", escaped));
}
}
parts.join(" ")
}
/// Search the FTS5 index in fts.db for packages matching a query.
pub fn search_fts(
fts_db_path: &Path,
query: &str,
publisher_filter: Option<&str>,
limit: Option<usize>,
) -> Result<Vec<FtsSearchResult>, ShardBuildError> {
let conn = Connection::open_with_flags(fts_db_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
let sanitized = sanitize_fts_query(query);
if sanitized.is_empty() {
return Ok(Vec::new());
}
let effective_limit = limit.unwrap_or(100) as i64;
// FTS5 columns cannot be filtered with standard WHERE clauses,
// so we fetch all matches and filter by publisher in Rust.
let mut stmt = conn.prepare(
"SELECT stem, publisher, summary, description, rank \
FROM package_search \
WHERE package_search MATCH ?1 \
ORDER BY rank \
LIMIT ?2",
)?;
let rows = stmt.query_map(rusqlite::params![sanitized, effective_limit], |row| {
Ok(FtsSearchResult {
stem: row.get(0)?,
publisher: row.get(1)?,
summary: row.get(2)?,
description: row.get(3)?,
rank: row.get(4)?,
})
})?;
let mut results: Vec<FtsSearchResult> = rows.collect::<Result<Vec<_>, _>>()?;
if let Some(publisher) = publisher_filter {
results.retain(|r| r.publisher == publisher);
}
Ok(results)
}
/// Look up the latest FMRI for each (stem, publisher) pair from active.db.
pub fn resolve_latest_fmris(
active_db_path: &Path,
stems: &[(String, String)],
) -> Result<HashMap<(String, String), String>, ShardBuildError> {
let conn = Connection::open_with_flags(active_db_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
let mut stmt = conn.prepare(
"SELECT fmri FROM packages WHERE stem = ?1 AND publisher = ?2 ORDER BY rowid DESC LIMIT 1",
)?;
let mut result = HashMap::new();
for (stem, publisher) in stems {
if let Ok(fmri) = stmt.query_row(rusqlite::params![stem, publisher], |row| {
row.get::<_, String>(0)
}) {
result.insert((stem.clone(), publisher.clone()), fmri);
}
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_fts_db(path: &Path) {
let conn = Connection::open(path).unwrap();
conn.execute_batch(FTS_SCHEMA).unwrap();
conn.execute(
"INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)",
rusqlite::params!["web/server/nginx", "openindiana.org", "NGINX web server", "High performance HTTP server and reverse proxy"],
).unwrap();
conn.execute(
"INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)",
rusqlite::params!["web/server/apache", "openindiana.org", "Apache HTTP Server", "The Apache HTTP Server Project"],
).unwrap();
conn.execute(
"INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)",
rusqlite::params!["database/postgresql", "openindiana.org", "PostgreSQL database", "PostgreSQL object-relational database management"],
).unwrap();
conn.execute(
"INSERT INTO package_search (stem, publisher, summary, description) VALUES (?1, ?2, ?3, ?4)",
rusqlite::params!["runtime/coreutils", "otherpub", "Core utilities", "Essential command line utilities"],
).unwrap();
}
fn create_test_active_db(path: &Path) {
let conn = Connection::open(path).unwrap();
conn.execute_batch(ACTIVE_SCHEMA).unwrap();
conn.execute(
"INSERT INTO packages (stem, version, publisher) VALUES (?1, ?2, ?3)",
rusqlite::params!["web/server/nginx", "1.24.0,5.11-2024.0.1.0", "openindiana.org"],
).unwrap();
conn.execute(
"INSERT INTO packages (stem, version, publisher) VALUES (?1, ?2, ?3)",
rusqlite::params!["web/server/apache", "2.4.58,5.11-2024.0.1.0", "openindiana.org"],
).unwrap();
conn.execute(
"INSERT INTO packages (stem, version, publisher) VALUES (?1, ?2, ?3)",
rusqlite::params!["database/postgresql", "16.1,5.11-2024.0.1.0", "openindiana.org"],
).unwrap();
}
#[test]
fn test_search_fts_basic() {
let dir = tempfile::tempdir().unwrap();
let fts_path = dir.path().join("fts.db");
create_test_fts_db(&fts_path);
let results = search_fts(&fts_path, "nginx", None, None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].stem, "web/server/nginx");
}
#[test]
fn test_search_fts_multiple_results() {
let dir = tempfile::tempdir().unwrap();
let fts_path = dir.path().join("fts.db");
create_test_fts_db(&fts_path);
// "server" appears in both nginx and apache summaries
let results = search_fts(&fts_path, "server", None, None).unwrap();
assert!(results.len() >= 2);
}
#[test]
fn test_search_fts_prefix() {
let dir = tempfile::tempdir().unwrap();
let fts_path = dir.path().join("fts.db");
create_test_fts_db(&fts_path);
let results = search_fts(&fts_path, "post*", None, None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].stem, "database/postgresql");
}
#[test]
fn test_search_fts_no_results() {
let dir = tempfile::tempdir().unwrap();
let fts_path = dir.path().join("fts.db");
create_test_fts_db(&fts_path);
let results = search_fts(&fts_path, "nonexistent", None, None).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_search_fts_publisher_filter() {
let dir = tempfile::tempdir().unwrap();
let fts_path = dir.path().join("fts.db");
create_test_fts_db(&fts_path);
// "coreutils" matches the otherpub entry only
let results =
search_fts(&fts_path, "coreutils", Some("otherpub"), None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].publisher, "otherpub");
// Same query but filtered to openindiana.org should return nothing
let results =
search_fts(&fts_path, "coreutils", Some("openindiana.org"), None).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_search_fts_limit() {
let dir = tempfile::tempdir().unwrap();
let fts_path = dir.path().join("fts.db");
create_test_fts_db(&fts_path);
let results = search_fts(&fts_path, "server", None, Some(1)).unwrap();
assert_eq!(results.len(), 1);
}
#[test]
fn test_resolve_latest_fmris() {
let dir = tempfile::tempdir().unwrap();
let active_path = dir.path().join("active.db");
create_test_active_db(&active_path);
let stems = vec![
("web/server/nginx".to_string(), "openindiana.org".to_string()),
("database/postgresql".to_string(), "openindiana.org".to_string()),
("nonexistent/pkg".to_string(), "openindiana.org".to_string()),
];
let fmris = resolve_latest_fmris(&active_path, &stems).unwrap();
assert_eq!(fmris.len(), 2); // nonexistent should not be in results
assert!(fmris[&("web/server/nginx".to_string(), "openindiana.org".to_string())]
.contains("nginx"));
assert!(fmris[&("database/postgresql".to_string(), "openindiana.org".to_string())]
.contains("postgresql"));
}
#[test]
fn test_sanitize_fts_query() {
assert_eq!(sanitize_fts_query("nginx"), "\"nginx\"");
assert_eq!(sanitize_fts_query("web server"), "\"web\" \"server\"");
assert_eq!(sanitize_fts_query("post*"), "\"post\"*");
assert_eq!(sanitize_fts_query(""), "");
assert_eq!(sanitize_fts_query(" "), "");
}
}

View file

@ -82,10 +82,10 @@ pub async fn get_versions() -> impl IntoResponse {
op: Operation::Publisher, op: Operation::Publisher,
versions: vec![0, 1], versions: vec![0, 1],
}, },
//SupportedOperation { SupportedOperation {
// op: Operation::Search, op: Operation::Search,
// versions: vec![0, 1], versions: vec![0, 1],
//}, },
], ],
}; };

View file

@ -483,3 +483,118 @@ async fn test_multiple_publishers_default_route() {
assert!(names.contains(&"pub1".to_string())); assert!(names.contains(&"pub1".to_string()));
assert!(names.contains(&"pub2".to_string())); assert!(names.contains(&"pub2".to_string()));
} }
#[tokio::test]
async fn test_search_endpoint() {
// Setup repo with a published package (rebuild builds fts.db)
let temp_dir = TempDir::new().unwrap();
let repo_path = setup_repo(&temp_dir);
let config = Config {
server: ServerConfig {
bind: vec!["127.0.0.1:0".to_string()],
workers: None,
max_connections: None,
reuseport: None,
cache_max_age: Some(3600),
tls_cert: None,
tls_key: None,
},
repository: RepositoryConfig {
root: repo_path.clone(),
mode: Some("readonly".to_string()),
},
telemetry: None,
publishers: None,
admin: None,
oauth2: None,
};
let repo = DepotRepo::new(&config).unwrap();
let state = Arc::new(repo);
let router = http::routes::app_router(state);
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move {
http::server::run(router, listener).await.unwrap();
});
let client = reqwest::Client::new();
let base_url = format!("http://{}", addr);
// 1. Test versions endpoint now advertises search
let resp = client
.get(format!("{}/versions/0/", base_url))
.send()
.await
.unwrap();
let text = resp.text().await.unwrap();
assert!(text.contains("search 0 1"), "versions should advertise search");
// 2. Test search v0 - search for "example" (the package stem)
let resp = client
.get(format!("{}/test/search/0/example", base_url))
.send()
.await
.unwrap();
assert!(resp.status().is_success(), "search v0 should succeed");
let body = resp.text().await.unwrap();
assert!(body.contains("example"), "search v0 should find 'example' package");
// 3. Test search v1 - search for "example"
let resp = client
.get(format!(
"{}/test/search/1/False_2_None_None_example",
base_url
))
.send()
.await
.unwrap();
assert!(resp.status().is_success(), "search v1 should succeed");
let body = resp.text().await.unwrap();
assert!(
body.contains("Return from search v1"),
"search v1 should have header"
);
assert!(body.contains("example"), "search v1 should find 'example' package");
// 4. Test search v0 - no results
let resp = client
.get(format!("{}/test/search/0/nonexistentpackage", base_url))
.send()
.await
.unwrap();
assert!(resp.status().is_success());
let body = resp.text().await.unwrap();
assert!(body.is_empty(), "search for nonexistent should return empty body");
// 5. Test search v1 - no results returns 204
let resp = client
.get(format!(
"{}/test/search/1/False_2_None_None_nonexistentpackage",
base_url
))
.send()
.await
.unwrap();
assert_eq!(
resp.status().as_u16(),
204,
"search v1 with no results should return 204"
);
// 6. Test search by summary keyword "Test Package"
let resp = client
.get(format!("{}/test/search/0/Test", base_url))
.send()
.await
.unwrap();
assert!(resp.status().is_success());
let body = resp.text().await.unwrap();
assert!(
body.contains("example"),
"search by summary keyword should find the package"
);
}