fix: Deduplicate FTS entries across catalog parts in build_shards

build_shards() iterates all three catalog parts (base, dependency,
summary) and each lists the same packages, causing every stem to be
inserted into the FTS index 3 times. Track seen stems in a HashSet
and skip duplicates. Fixes #22.
This commit is contained in:
Till Wegmueller 2026-04-09 22:18:15 +02:00
parent f8ab1f94c1
commit 96b7207194

View file

@ -193,6 +193,10 @@ pub fn build_shards(
.map_err(|e| ShardBuildError::new(format!("Failed to create catalog manager: {}", e)))?; .map_err(|e| ShardBuildError::new(format!("Failed to create catalog manager: {}", e)))?;
let mut package_count = 0usize; let mut package_count = 0usize;
let mut package_version_count = 0usize; let mut package_version_count = 0usize;
// Track which packages have been inserted into FTS to avoid duplicates.
// The catalog has multiple parts (base, dependency, summary) that each
// list the same packages — we only want one FTS entry per stem.
let mut fts_seen: std::collections::HashSet<String> = std::collections::HashSet::new();
// Begin transactions for batch inserts // Begin transactions for batch inserts
let active_tx = active_conn.transaction()?; let active_tx = active_conn.transaction()?;
@ -332,7 +336,10 @@ pub fn build_shards(
} }
} }
// Extract summary and description for FTS // Extract summary and description for FTS (deduplicate
// across catalog parts — base, dependency, and summary
// parts all list the same packages)
if fts_seen.insert(pkg_name.clone()) {
let summary = manifest let summary = manifest
.attributes .attributes
.iter() .iter()
@ -359,6 +366,7 @@ pub fn build_shards(
} }
} }
} }
}
// Commit transactions // Commit transactions
active_tx.commit()?; active_tx.commit()?;