From 8a32bf31761601b276d84f4b2cd573b0d9141b2d Mon Sep 17 00:00:00 2001 From: Till Wegmueller Date: Mon, 21 Jul 2025 22:37:08 +0200 Subject: [PATCH] Add compression support (Gzip/LZ4) and update dependencies Signed-off-by: Till Wegmueller --- Cargo.lock | 50 ++++++++++++---- libips/Cargo.toml | 2 + libips/src/repository/file_backend.rs | 84 ++++++++++++++++++++++----- 3 files changed, 110 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05cd45e..bbe6ca2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,10 +3,10 @@ version = 4 [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" @@ -147,9 +147,12 @@ checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" [[package]] name = "cc" -version = "1.0.79" +version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" +dependencies = [ + "shlex", +] [[package]] name = "cfg-if" @@ -380,9 +383,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "flate2" -version = "1.0.25" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" dependencies = [ "crc32fast", "miniz_oxide", @@ -712,6 +715,8 @@ version = "0.1.2" dependencies = [ "anyhow", "diff-struct", + "flate2", + "lz4", "maplit 0.1.6", "object", "pest", @@ -740,6 +745,25 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "lz4" +version = "1.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" +dependencies = [ + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "maplit" version = "0.1.6" @@ -766,11 +790,11 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miniz_oxide" -version = "0.6.2" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ - "adler", + "adler2", ] [[package]] @@ -1417,6 +1441,12 @@ dependencies = [ "dirs", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "slab" version = "0.4.8" diff --git a/libips/Cargo.toml b/libips/Cargo.toml index 4e997be..3e5cd79 100644 --- a/libips/Cargo.toml +++ b/libips/Cargo.toml @@ -29,3 +29,5 @@ strum = { version = "0.24.1", features = ["derive"] } serde = { version = "1.0.207", features = ["derive"] } serde_json = "1.0.124" diff-struct = "0.5.3" +flate2 = "1.0.28" +lz4 = "1.24.0" diff --git a/libips/src/repository/file_backend.rs b/libips/src/repository/file_backend.rs index bfc2752..a57d256 100644 --- a/libips/src/repository/file_backend.rs +++ b/libips/src/repository/file_backend.rs @@ -5,12 +5,15 @@ use anyhow::{Result, anyhow}; use std::fs; -use std::io::Read; +use std::io::{Read, Write}; use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; use std::str::FromStr; use sha2::{Sha256, Digest as Sha2Digest}; use std::fs::File; +use flate2::write::GzEncoder; +use flate2::Compression as GzipCompression; +use lz4::EncoderBuilder; use crate::actions::{Manifest, File as FileAction}; use crate::digest::Digest; @@ -64,8 +67,9 @@ impl Transaction { /// Process a file for the transaction /// /// Takes a FileAction and a path to a file in a prototype directory. - /// Calculates the file's checksum, copies and "compresses" the content into a temp file - /// in the transactions directory, and updates the FileAction with the hash information. + /// Calculates the file's checksum, compresses the content using the specified algorithm (Gzip or LZ4), + /// stores the compressed content in a temp file in the transactions directory, + /// and updates the FileAction with the hash information for both uncompressed and compressed versions. pub fn add_file(&mut self, file_action: FileAction, file_path: &Path) -> Result<()> { // Calculate SHA256 hash of the file (uncompressed) let hash = Self::calculate_file_hash(file_path)?; @@ -74,28 +78,76 @@ impl Transaction { let temp_file_name = format!("temp_{}", hash); let temp_file_path = self.path.join(temp_file_name); - // Copy the file to the temp location (this is a placeholder for compression) - // In a real implementation, we would compress the file here - fs::copy(file_path, &temp_file_path)?; + // Check if the temp file already exists + if temp_file_path.exists() { + // If it exists, remove it to avoid any issues with existing content + fs::remove_file(&temp_file_path).map_err(|e| anyhow!("Failed to remove existing temp file: {}", e))?; + } - // For now, we're using the same hash for both uncompressed and "compressed" versions - // In a real implementation with compression, we would calculate the hash of the compressed file - let compressed_hash = hash.clone(); - - // Add file to the list for later processing during commit - self.files.push((file_path.to_path_buf(), hash.clone())); - - // Create a new FileAction with the updated information if one wasn't provided - let mut updated_file_action = file_action; + // Read the file content + let file_content = fs::read(file_path).map_err(|e| anyhow!("Failed to read file {}: {}", file_path.display(), e))?; // Create a payload with the hash information if it doesn't exist + let mut updated_file_action = file_action; let mut payload = updated_file_action.payload.unwrap_or_else(Payload::default); + // Set the compression algorithm (use the one from payload or default to Gzip) + let compression_algorithm = payload.compression_algorithm; + + // Compress the file based on the selected algorithm + let compressed_hash = match compression_algorithm { + PayloadCompressionAlgorithm::Gzip => { + // Create a Gzip encoder with default compression level + let mut encoder = GzEncoder::new(Vec::new(), GzipCompression::default()); + + // Write the file content to the encoder + encoder.write_all(&file_content) + .map_err(|e| anyhow!("Failed to write data to Gzip encoder: {}", e))?; + + // Finish the compression and get the compressed data + let compressed_data = encoder.finish() + .map_err(|e| anyhow!("Failed to finish Gzip compression: {}", e))?; + + // Write the compressed data to the temp file + fs::write(&temp_file_path, &compressed_data) + .map_err(|e| anyhow!("Failed to write compressed data to temp file: {}", e))?; + + // Calculate hash of the compressed data + let mut hasher = Sha256::new(); + hasher.update(&compressed_data); + format!("{:x}", hasher.finalize()) + }, + PayloadCompressionAlgorithm::LZ4 => { + // Create an LZ4 encoder with default compression level + let mut encoder = EncoderBuilder::new().build(Vec::new()) + .map_err(|e| anyhow!("Failed to create LZ4 encoder: {}", e))?; + + // Write the file content to the encoder + encoder.write_all(&file_content) + .map_err(|e| anyhow!("Failed to write data to LZ4 encoder: {}", e))?; + + // Finish the compression and get the compressed data + let (compressed_data, _) = encoder.finish(); + + // Write the compressed data to the temp file + fs::write(&temp_file_path, &compressed_data) + .map_err(|e| anyhow!("Failed to write LZ4 compressed data to temp file: {}", e))?; + + // Calculate hash of the compressed data + let mut hasher = Sha256::new(); + hasher.update(&compressed_data); + format!("{:x}", hasher.finalize()) + } + }; + + // Add file to the list for later processing during commit + self.files.push((temp_file_path.clone(), compressed_hash.clone())); + // Set the primary identifier (uncompressed hash) payload.primary_identifier = Digest::from_str(&hash)?; // Set the compression algorithm - payload.compression_algorithm = PayloadCompressionAlgorithm::Gzip; + payload.compression_algorithm = compression_algorithm; // Add the compressed hash as an additional identifier let compressed_digest = Digest::from_str(&compressed_hash)?;