fix: Store payloads under primary hash and fix digest detection

- Store compressed payloads under the primary (uncompressed) hash as
  the main key, matching IPS protocol where clients request files by
  the manifest's primary hash via /file/0/<hash>
- Create hardlink under compressed hash for internal lookups
- Detect SHA256 (64 chars) vs SHA1 (40 chars) from bare hex hash
  length in Digest::from_str instead of always defaulting to SHA1
- Remove digest verification for local file copies in FileBackend
  (files are compressed, hash is of uncompressed content)
- Simplify recv.rs payload fetch to use primary hash directly
  instead of trying multiple digest formats against the source
This commit is contained in:
Till Wegmueller 2026-03-15 20:58:44 +01:00
parent a0fe229ba4
commit bcea795848
3 changed files with 33 additions and 74 deletions

View file

@ -81,9 +81,15 @@ impl FromStr for Digest {
fn from_str(s: &str) -> StdResult<Self, Self::Err> { fn from_str(s: &str) -> StdResult<Self, Self::Err> {
let str = String::from(s); let str = String::from(s);
if !s.contains(':') { if !s.contains(':') {
// Detect algorithm from hash length for bare hex strings
let algorithm = match s.len() {
64 => DigestAlgorithm::SHA256,
128 => DigestAlgorithm::SHA512,
_ => DigestAlgorithm::SHA1, // 40 chars or other lengths default to SHA1
};
return Ok(Digest { return Ok(Digest {
hash: String::from(s), hash: String::from(s),
algorithm: DigestAlgorithm::SHA1, algorithm,
source: DigestSource::PrimaryPayloadHash, source: DigestSource::PrimaryPayloadHash,
}); });
} }

View file

@ -228,25 +228,6 @@ impl<'a, S: ReadableRepository + Sync> PackageReceiver<'a, S> {
.map(|file| { .map(|file| {
let payload = file.payload.as_ref().unwrap(); let payload = file.payload.as_ref().unwrap();
// Collect candidate digests to try: raw primary hash first (works with
// REST servers that use SHA1), then formatted digests with algorithm info
// (works with FileBackend that may store under compressed hash)
let mut digests = vec![payload.primary_identifier.hash.clone()];
// Add formatted primary with algorithm info as second attempt
let formatted_primary = payload.primary_identifier.to_string();
if formatted_primary != payload.primary_identifier.hash {
digests.push(formatted_primary);
}
for additional in &payload.additional_identifiers {
if !additional.hash.is_empty() {
digests.push(additional.hash.clone());
let formatted = additional.to_string();
if formatted != additional.hash {
digests.push(formatted);
}
}
}
let digest = &payload.primary_identifier.hash; let digest = &payload.primary_identifier.hash;
let temp_file_path = temp_dir_path.join(digest); let temp_file_path = temp_dir_path.join(digest);
@ -256,23 +237,10 @@ impl<'a, S: ReadableRepository + Sync> PackageReceiver<'a, S> {
temp_file_path.display() temp_file_path.display()
); );
// Try each digest until one succeeds (primary hash may differ from storage hash) // Fetch payload using the primary hash from the manifest.
let mut last_err = None; // This is the hash the source repository knows the file by.
for d in &digests { self.source
match self.source.fetch_payload(&publisher_str, d, &temp_file_path) { .fetch_payload(&publisher_str, digest, &temp_file_path)?;
Ok(()) => {
last_err = None;
break;
}
Err(e) => {
debug!("Failed to fetch payload with digest {}: {}", d, e);
last_err = Some(e);
}
}
}
if let Some(e) = last_err {
return Err(e);
}
// Update progress atomically // Update progress atomically
let current_count = { let current_count = {

View file

@ -392,46 +392,45 @@ impl Transaction {
} }
} }
// Move files to their final location (atomic rename, same filesystem) // Move files to their final location (atomic rename, same filesystem).
// Store under the primary (uncompressed) hash — this is the hash that clients
// use to request files via /file/0/<hash>. Also store under compressed hash
// for internal lookups.
for (source_path, compressed_hash, primary_hash) in self.files { for (source_path, compressed_hash, primary_hash) in self.files {
let dest_path = // Primary storage path: use the primary (uncompressed) hash as the key,
FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &compressed_hash); // matching IPS protocol where clients look up files by manifest hash
let primary_path =
FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &primary_hash);
if let Some(parent) = dest_path.parent() { if let Some(parent) = primary_path.parent() {
fs::create_dir_all(parent).map_err(|e| RepositoryError::DirectoryCreateError { fs::create_dir_all(parent).map_err(|e| RepositoryError::DirectoryCreateError {
path: parent.to_path_buf(), path: parent.to_path_buf(),
source: e, source: e,
})?; })?;
} }
if !dest_path.exists() { if !primary_path.exists() {
fs::rename(&source_path, &dest_path).map_err(|e| RepositoryError::FileRenameError { fs::rename(&source_path, &primary_path).map_err(|e| RepositoryError::FileRenameError {
from: source_path.clone(), from: source_path.clone(),
to: dest_path.clone(), to: primary_path.clone(),
source: e, source: e,
})?; })?;
} }
// Create a hardlink from the primary (uncompressed) hash so clients // Also create a hardlink under the compressed hash for internal lookups
// that look up files by the manifest's primary hash can find them
if primary_hash != compressed_hash { if primary_hash != compressed_hash {
let primary_path = let compressed_path =
FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &primary_hash); FileBackend::construct_file_path_with_publisher(&self.repo, &publisher, &compressed_hash);
if !primary_path.exists() { if !compressed_path.exists() {
if let Some(parent) = primary_path.parent() { if let Some(parent) = compressed_path.parent() {
fs::create_dir_all(parent).map_err(|e| RepositoryError::DirectoryCreateError { fs::create_dir_all(parent).map_err(|e| RepositoryError::DirectoryCreateError {
path: parent.to_path_buf(), path: parent.to_path_buf(),
source: e, source: e,
})?; })?;
} }
if let Err(e) = fs::hard_link(&dest_path, &primary_path) { if let Err(e) = fs::hard_link(&primary_path, &compressed_path) {
debug!("Failed to create hardlink from {} to {}: {}, falling back to copy", debug!("Failed to create hardlink for compressed hash: {}", e);
dest_path.display(), primary_path.display(), e); // Not fatal — compressed hash lookup is optional
fs::copy(&dest_path, &primary_path).map_err(|e| RepositoryError::FileCopyError {
from: dest_path.clone(),
to: primary_path,
source: e,
})?;
} }
} }
} }
@ -1294,26 +1293,12 @@ impl ReadableRepository for FileBackend {
} }
} }
// Read source content and verify digest // Read source content (stored compressed — digest of compressed bytes may not
// match the primary/uncompressed hash, so we skip verification for local files)
let bytes = fs::read(&source_path).map_err(|e| RepositoryError::FileReadError { let bytes = fs::read(&source_path).map_err(|e| RepositoryError::FileReadError {
path: source_path.clone(), path: source_path.clone(),
source: e, source: e,
})?; })?;
match crate::digest::Digest::from_bytes(
&bytes,
algo,
crate::digest::DigestSource::PrimaryPayloadHash,
) {
Ok(comp) => {
if comp.hash != hash {
return Err(RepositoryError::DigestError(format!(
"Digest mismatch: expected {}, got {}",
hash, comp.hash
)));
}
}
Err(e) => return Err(RepositoryError::DigestError(e.to_string())),
}
// Write atomically // Write atomically
let tmp = dest.with_extension("tmp"); let tmp = dest.with_extension("tmp");