fix: Deduplicate FTS entries across catalog parts in build_shards

build_shards() iterates all three catalog parts (base, dependency,
summary) and each lists the same packages, causing every stem to be
inserted into the FTS index 3 times. Track seen stems in a HashSet
and skip duplicates. Fixes #22.
This commit is contained in:
Till Wegmueller 2026-04-09 22:18:15 +02:00
parent f8ab1f94c1
commit 96b7207194

View file

@ -193,6 +193,10 @@ pub fn build_shards(
.map_err(|e| ShardBuildError::new(format!("Failed to create catalog manager: {}", e)))?; .map_err(|e| ShardBuildError::new(format!("Failed to create catalog manager: {}", e)))?;
let mut package_count = 0usize; let mut package_count = 0usize;
let mut package_version_count = 0usize; let mut package_version_count = 0usize;
// Track which packages have been inserted into FTS to avoid duplicates.
// The catalog has multiple parts (base, dependency, summary) that each
// list the same packages — we only want one FTS entry per stem.
let mut fts_seen: std::collections::HashSet<String> = std::collections::HashSet::new();
// Begin transactions for batch inserts // Begin transactions for batch inserts
let active_tx = active_conn.transaction()?; let active_tx = active_conn.transaction()?;
@ -332,28 +336,32 @@ pub fn build_shards(
} }
} }
// Extract summary and description for FTS // Extract summary and description for FTS (deduplicate
let summary = manifest // across catalog parts — base, dependency, and summary
.attributes // parts all list the same packages)
.iter() if fts_seen.insert(pkg_name.clone()) {
.find(|a| a.key == "pkg.summary") let summary = manifest
.and_then(|a| a.values.first()) .attributes
.map(|s| s.as_str()) .iter()
.unwrap_or(""); .find(|a| a.key == "pkg.summary")
let description = manifest .and_then(|a| a.values.first())
.attributes .map(|s| s.as_str())
.iter() .unwrap_or("");
.find(|a| a.key == "pkg.description") let description = manifest
.and_then(|a| a.values.first()) .attributes
.map(|s| s.as_str()) .iter()
.unwrap_or(""); .find(|a| a.key == "pkg.description")
.and_then(|a| a.values.first())
.map(|s| s.as_str())
.unwrap_or("");
insert_fts.execute(rusqlite::params![ insert_fts.execute(rusqlite::params![
pkg_name, pkg_name,
publisher, publisher,
summary, summary,
description description
])?; ])?;
}
} }
} }
} }