From bcea7958482bda1f56087ada0155a95b62567950 Mon Sep 17 00:00:00 2001 From: Till Wegmueller Date: Sun, 15 Mar 2026 20:58:44 +0100 Subject: [PATCH] fix: Store payloads under primary hash and fix digest detection - Store compressed payloads under the primary (uncompressed) hash as the main key, matching IPS protocol where clients request files by the manifest's primary hash via /file/0/ - Create hardlink under compressed hash for internal lookups - Detect SHA256 (64 chars) vs SHA1 (40 chars) from bare hex hash length in Digest::from_str instead of always defaulting to SHA1 - Remove digest verification for local file copies in FileBackend (files are compressed, hash is of uncompressed content) - Simplify recv.rs payload fetch to use primary hash directly instead of trying multiple digest formats against the source --- libips/src/digest/mod.rs | 8 +++- libips/src/recv.rs | 40 ++---------------- libips/src/repository/file_backend.rs | 59 ++++++++++----------------- 3 files changed, 33 insertions(+), 74 deletions(-) diff --git a/libips/src/digest/mod.rs b/libips/src/digest/mod.rs index 64786e7..196d7b3 100644 --- a/libips/src/digest/mod.rs +++ b/libips/src/digest/mod.rs @@ -81,9 +81,15 @@ impl FromStr for Digest { fn from_str(s: &str) -> StdResult { let str = String::from(s); if !s.contains(':') { + // Detect algorithm from hash length for bare hex strings + let algorithm = match s.len() { + 64 => DigestAlgorithm::SHA256, + 128 => DigestAlgorithm::SHA512, + _ => DigestAlgorithm::SHA1, // 40 chars or other lengths default to SHA1 + }; return Ok(Digest { hash: String::from(s), - algorithm: DigestAlgorithm::SHA1, + algorithm, source: DigestSource::PrimaryPayloadHash, }); } diff --git a/libips/src/recv.rs b/libips/src/recv.rs index c6a9995..f71fb51 100644 --- a/libips/src/recv.rs +++ b/libips/src/recv.rs @@ -228,25 +228,6 @@ impl<'a, S: ReadableRepository + Sync> PackageReceiver<'a, S> { .map(|file| { let payload = file.payload.as_ref().unwrap(); - // Collect candidate digests to try: raw primary hash first (works with - // REST servers that use SHA1), then formatted digests with algorithm info - // (works with FileBackend that may store under compressed hash) - let mut digests = vec![payload.primary_identifier.hash.clone()]; - // Add formatted primary with algorithm info as second attempt - let formatted_primary = payload.primary_identifier.to_string(); - if formatted_primary != payload.primary_identifier.hash { - digests.push(formatted_primary); - } - for additional in &payload.additional_identifiers { - if !additional.hash.is_empty() { - digests.push(additional.hash.clone()); - let formatted = additional.to_string(); - if formatted != additional.hash { - digests.push(formatted); - } - } - } - let digest = &payload.primary_identifier.hash; let temp_file_path = temp_dir_path.join(digest); @@ -256,23 +237,10 @@ impl<'a, S: ReadableRepository + Sync> PackageReceiver<'a, S> { temp_file_path.display() ); - // Try each digest until one succeeds (primary hash may differ from storage hash) - let mut last_err = None; - for d in &digests { - match self.source.fetch_payload(&publisher_str, d, &temp_file_path) { - Ok(()) => { - last_err = None; - break; - } - Err(e) => { - debug!("Failed to fetch payload with digest {}: {}", d, e); - last_err = Some(e); - } - } - } - if let Some(e) = last_err { - return Err(e); - } + // Fetch payload using the primary hash from the manifest. + // This is the hash the source repository knows the file by. + self.source + .fetch_payload(&publisher_str, digest, &temp_file_path)?; // Update progress atomically let current_count = { diff --git a/libips/src/repository/file_backend.rs b/libips/src/repository/file_backend.rs index 86b84cf..1149bcf 100644 --- a/libips/src/repository/file_backend.rs +++ b/libips/src/repository/file_backend.rs @@ -392,46 +392,45 @@ impl Transaction { } } - // Move files to their final location (atomic rename, same filesystem) + // Move files to their final location (atomic rename, same filesystem). + // Store under the primary (uncompressed) hash — this is the hash that clients + // use to request files via /file/0/. Also store under compressed hash + // for internal lookups. for (source_path, compressed_hash, primary_hash) in self.files { - let dest_path = - FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &compressed_hash); + // Primary storage path: use the primary (uncompressed) hash as the key, + // matching IPS protocol where clients look up files by manifest hash + let primary_path = + FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &primary_hash); - if let Some(parent) = dest_path.parent() { + if let Some(parent) = primary_path.parent() { fs::create_dir_all(parent).map_err(|e| RepositoryError::DirectoryCreateError { path: parent.to_path_buf(), source: e, })?; } - if !dest_path.exists() { - fs::rename(&source_path, &dest_path).map_err(|e| RepositoryError::FileRenameError { + if !primary_path.exists() { + fs::rename(&source_path, &primary_path).map_err(|e| RepositoryError::FileRenameError { from: source_path.clone(), - to: dest_path.clone(), + to: primary_path.clone(), source: e, })?; } - // Create a hardlink from the primary (uncompressed) hash so clients - // that look up files by the manifest's primary hash can find them + // Also create a hardlink under the compressed hash for internal lookups if primary_hash != compressed_hash { - let primary_path = - FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &primary_hash); - if !primary_path.exists() { - if let Some(parent) = primary_path.parent() { + let compressed_path = + FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &compressed_hash); + if !compressed_path.exists() { + if let Some(parent) = compressed_path.parent() { fs::create_dir_all(parent).map_err(|e| RepositoryError::DirectoryCreateError { path: parent.to_path_buf(), source: e, })?; } - if let Err(e) = fs::hard_link(&dest_path, &primary_path) { - debug!("Failed to create hardlink from {} to {}: {}, falling back to copy", - dest_path.display(), primary_path.display(), e); - fs::copy(&dest_path, &primary_path).map_err(|e| RepositoryError::FileCopyError { - from: dest_path.clone(), - to: primary_path, - source: e, - })?; + if let Err(e) = fs::hard_link(&primary_path, &compressed_path) { + debug!("Failed to create hardlink for compressed hash: {}", e); + // Not fatal — compressed hash lookup is optional } } } @@ -1294,26 +1293,12 @@ impl ReadableRepository for FileBackend { } } - // Read source content and verify digest + // Read source content (stored compressed — digest of compressed bytes may not + // match the primary/uncompressed hash, so we skip verification for local files) let bytes = fs::read(&source_path).map_err(|e| RepositoryError::FileReadError { path: source_path.clone(), source: e, })?; - match crate::digest::Digest::from_bytes( - &bytes, - algo, - crate::digest::DigestSource::PrimaryPayloadHash, - ) { - Ok(comp) => { - if comp.hash != hash { - return Err(RepositoryError::DigestError(format!( - "Digest mismatch: expected {}, got {}", - hash, comp.hash - ))); - } - } - Err(e) => return Err(RepositoryError::DigestError(e.to_string())), - } // Write atomically let tmp = dest.with_extension("tmp");