Add compression support (Gzip/LZ4) and update dependencies

Signed-off-by: Till Wegmueller <toasterson@gmail.com>
This commit is contained in:
Till Wegmueller 2025-07-21 22:37:08 +02:00
parent 1a5e0e053d
commit 8a32bf3176
No known key found for this signature in database
3 changed files with 110 additions and 26 deletions

50
Cargo.lock generated
View file

@ -3,10 +3,10 @@
version = 4
[[package]]
name = "adler"
version = "1.0.2"
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
@ -147,9 +147,12 @@ checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
[[package]]
name = "cc"
version = "1.0.79"
version = "1.2.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
dependencies = [
"shlex",
]
[[package]]
name = "cfg-if"
@ -380,9 +383,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "flate2"
version = "1.0.25"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841"
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
dependencies = [
"crc32fast",
"miniz_oxide",
@ -712,6 +715,8 @@ version = "0.1.2"
dependencies = [
"anyhow",
"diff-struct",
"flate2",
"lz4",
"maplit 0.1.6",
"object",
"pest",
@ -740,6 +745,25 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "lz4"
version = "1.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4"
dependencies = [
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.11.1+lz4-1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "maplit"
version = "0.1.6"
@ -766,11 +790,11 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "miniz_oxide"
version = "0.6.2"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler",
"adler2",
]
[[package]]
@ -1417,6 +1441,12 @@ dependencies = [
"dirs",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "slab"
version = "0.4.8"

View file

@ -29,3 +29,5 @@ strum = { version = "0.24.1", features = ["derive"] }
serde = { version = "1.0.207", features = ["derive"] }
serde_json = "1.0.124"
diff-struct = "0.5.3"
flate2 = "1.0.28"
lz4 = "1.24.0"

View file

@ -5,12 +5,15 @@
use anyhow::{Result, anyhow};
use std::fs;
use std::io::Read;
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use std::str::FromStr;
use sha2::{Sha256, Digest as Sha2Digest};
use std::fs::File;
use flate2::write::GzEncoder;
use flate2::Compression as GzipCompression;
use lz4::EncoderBuilder;
use crate::actions::{Manifest, File as FileAction};
use crate::digest::Digest;
@ -64,8 +67,9 @@ impl Transaction {
/// Process a file for the transaction
///
/// Takes a FileAction and a path to a file in a prototype directory.
/// Calculates the file's checksum, copies and "compresses" the content into a temp file
/// in the transactions directory, and updates the FileAction with the hash information.
/// Calculates the file's checksum, compresses the content using the specified algorithm (Gzip or LZ4),
/// stores the compressed content in a temp file in the transactions directory,
/// and updates the FileAction with the hash information for both uncompressed and compressed versions.
pub fn add_file(&mut self, file_action: FileAction, file_path: &Path) -> Result<()> {
// Calculate SHA256 hash of the file (uncompressed)
let hash = Self::calculate_file_hash(file_path)?;
@ -74,28 +78,76 @@ impl Transaction {
let temp_file_name = format!("temp_{}", hash);
let temp_file_path = self.path.join(temp_file_name);
// Copy the file to the temp location (this is a placeholder for compression)
// In a real implementation, we would compress the file here
fs::copy(file_path, &temp_file_path)?;
// Check if the temp file already exists
if temp_file_path.exists() {
// If it exists, remove it to avoid any issues with existing content
fs::remove_file(&temp_file_path).map_err(|e| anyhow!("Failed to remove existing temp file: {}", e))?;
}
// For now, we're using the same hash for both uncompressed and "compressed" versions
// In a real implementation with compression, we would calculate the hash of the compressed file
let compressed_hash = hash.clone();
// Add file to the list for later processing during commit
self.files.push((file_path.to_path_buf(), hash.clone()));
// Create a new FileAction with the updated information if one wasn't provided
let mut updated_file_action = file_action;
// Read the file content
let file_content = fs::read(file_path).map_err(|e| anyhow!("Failed to read file {}: {}", file_path.display(), e))?;
// Create a payload with the hash information if it doesn't exist
let mut updated_file_action = file_action;
let mut payload = updated_file_action.payload.unwrap_or_else(Payload::default);
// Set the compression algorithm (use the one from payload or default to Gzip)
let compression_algorithm = payload.compression_algorithm;
// Compress the file based on the selected algorithm
let compressed_hash = match compression_algorithm {
PayloadCompressionAlgorithm::Gzip => {
// Create a Gzip encoder with default compression level
let mut encoder = GzEncoder::new(Vec::new(), GzipCompression::default());
// Write the file content to the encoder
encoder.write_all(&file_content)
.map_err(|e| anyhow!("Failed to write data to Gzip encoder: {}", e))?;
// Finish the compression and get the compressed data
let compressed_data = encoder.finish()
.map_err(|e| anyhow!("Failed to finish Gzip compression: {}", e))?;
// Write the compressed data to the temp file
fs::write(&temp_file_path, &compressed_data)
.map_err(|e| anyhow!("Failed to write compressed data to temp file: {}", e))?;
// Calculate hash of the compressed data
let mut hasher = Sha256::new();
hasher.update(&compressed_data);
format!("{:x}", hasher.finalize())
},
PayloadCompressionAlgorithm::LZ4 => {
// Create an LZ4 encoder with default compression level
let mut encoder = EncoderBuilder::new().build(Vec::new())
.map_err(|e| anyhow!("Failed to create LZ4 encoder: {}", e))?;
// Write the file content to the encoder
encoder.write_all(&file_content)
.map_err(|e| anyhow!("Failed to write data to LZ4 encoder: {}", e))?;
// Finish the compression and get the compressed data
let (compressed_data, _) = encoder.finish();
// Write the compressed data to the temp file
fs::write(&temp_file_path, &compressed_data)
.map_err(|e| anyhow!("Failed to write LZ4 compressed data to temp file: {}", e))?;
// Calculate hash of the compressed data
let mut hasher = Sha256::new();
hasher.update(&compressed_data);
format!("{:x}", hasher.finalize())
}
};
// Add file to the list for later processing during commit
self.files.push((temp_file_path.clone(), compressed_hash.clone()));
// Set the primary identifier (uncompressed hash)
payload.primary_identifier = Digest::from_str(&hash)?;
// Set the compression algorithm
payload.compression_algorithm = PayloadCompressionAlgorithm::Gzip;
payload.compression_algorithm = compression_algorithm;
// Add the compressed hash as an additional identifier
let compressed_digest = Digest::from_str(&compressed_hash)?;