Add support for file URL without algorithm and refactor JSON serialization for Python-style compatibility

- Added `/file/1/{digest}` route to support file retrieval without specifying the algorithm.
- Implemented a Python-compatible JSON serializer to ensure consistent formatting for catalog artifacts.
- Replaced `HashMap` with `BTreeMap` for deterministic ordering in catalog serialization and updates.
- Updated integration tests to validate the new route functionality and ensure response correctness.
- Refactored `format_iso8601_basic` to improve timestamp formatting consistency.
This commit is contained in:
Till Wegmueller 2025-12-22 22:42:56 +01:00
parent a921c99eb6
commit ff0b9f4319
No known key found for this signature in database
6 changed files with 157 additions and 79 deletions

View file

@ -5,7 +5,7 @@
use miette::Diagnostic;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
@ -64,7 +64,7 @@ pub enum CatalogError {
pub type Result<T> = std::result::Result<T, CatalogError>;
/// Format a SystemTime as an ISO-8601 'basic format' date in UTC
fn format_iso8601_basic(time: &SystemTime) -> String {
pub fn format_iso8601_basic(time: &SystemTime) -> String {
let datetime = convert_system_time_to_datetime(time);
format!("{}Z", datetime.format("%Y%m%dT%H%M%S.%f"))
}
@ -141,18 +141,18 @@ pub struct CatalogAttrs {
pub package_version_count: usize,
/// Available catalog parts
pub parts: HashMap<String, CatalogPartInfo>,
pub parts: BTreeMap<String, CatalogPartInfo>,
/// Available update logs
#[serde(skip_serializing_if = "HashMap::is_empty")]
pub updates: HashMap<String, UpdateLogInfo>,
#[serde(skip_serializing_if = "BTreeMap::is_empty")]
pub updates: BTreeMap<String, UpdateLogInfo>,
/// Catalog version
pub version: u32,
/// Optional signature information
#[serde(rename = "_SIGNATURE", skip_serializing_if = "Option::is_none")]
pub signature: Option<HashMap<String, String>>,
pub signature: Option<BTreeMap<String, String>>,
}
impl CatalogAttrs {
@ -167,8 +167,8 @@ impl CatalogAttrs {
last_modified: timestamp,
package_count: 0,
package_version_count: 0,
parts: HashMap::new(),
updates: HashMap::new(),
parts: BTreeMap::new(),
updates: BTreeMap::new(),
version: CatalogVersion::V1 as u32,
}
}
@ -208,11 +208,11 @@ pub struct PackageVersionEntry {
pub struct CatalogPart {
/// Packages by publisher and stem
#[serde(flatten)]
pub packages: HashMap<String, HashMap<String, Vec<PackageVersionEntry>>>,
pub packages: BTreeMap<String, BTreeMap<String, Vec<PackageVersionEntry>>>,
/// Optional signature information
#[serde(rename = "_SIGNATURE", skip_serializing_if = "Option::is_none")]
pub signature: Option<HashMap<String, String>>,
pub signature: Option<BTreeMap<String, String>>,
}
impl CatalogPart {
@ -220,7 +220,7 @@ impl CatalogPart {
pub fn new() -> Self {
CatalogPart {
signature: None,
packages: HashMap::new(),
packages: BTreeMap::new(),
}
}
@ -235,7 +235,7 @@ impl CatalogPart {
let publisher_packages = self
.packages
.entry(publisher.to_string())
.or_insert_with(HashMap::new);
.or_insert_with(BTreeMap::new);
let stem_versions = publisher_packages
.entry(fmri.stem().to_string())
.or_insert_with(Vec::new);
@ -310,7 +310,7 @@ pub struct PackageUpdateEntry {
/// Catalog part entries
#[serde(flatten)]
pub catalog_parts: HashMap<String, HashMap<String, Vec<String>>>,
pub catalog_parts: BTreeMap<String, BTreeMap<String, Vec<String>>>,
/// Optional SHA-1 signature of the package manifest
#[serde(rename = "signature-sha-1", skip_serializing_if = "Option::is_none")]
@ -321,11 +321,11 @@ pub struct PackageUpdateEntry {
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct UpdateLog {
/// Updates by publisher and stem
pub updates: HashMap<String, HashMap<String, Vec<PackageUpdateEntry>>>,
pub updates: BTreeMap<String, BTreeMap<String, Vec<PackageUpdateEntry>>>,
/// Optional signature information
#[serde(rename = "_SIGNATURE", skip_serializing_if = "Option::is_none")]
pub signature: Option<HashMap<String, String>>,
pub signature: Option<BTreeMap<String, String>>,
}
impl UpdateLog {
@ -333,7 +333,7 @@ impl UpdateLog {
pub fn new() -> Self {
UpdateLog {
signature: None,
updates: HashMap::new(),
updates: BTreeMap::new(),
}
}
@ -343,13 +343,13 @@ impl UpdateLog {
publisher: &str,
fmri: &Fmri,
op_type: CatalogOperationType,
catalog_parts: HashMap<String, HashMap<String, Vec<String>>>,
catalog_parts: BTreeMap<String, BTreeMap<String, Vec<String>>>,
signature: Option<String>,
) {
let publisher_updates = self
.updates
.entry(publisher.to_string())
.or_insert_with(HashMap::new);
.or_insert_with(BTreeMap::new);
let stem_updates = publisher_updates
.entry(fmri.stem().to_string())
.or_insert_with(Vec::new);
@ -393,10 +393,10 @@ pub struct CatalogManager {
attrs: CatalogAttrs,
/// Catalog parts
parts: HashMap<String, CatalogPart>,
parts: BTreeMap<String, CatalogPart>,
/// Update logs
update_logs: HashMap<String, UpdateLog>,
update_logs: BTreeMap<String, UpdateLog>,
}
impl CatalogManager {
@ -421,8 +421,8 @@ impl CatalogManager {
catalog_dir: publisher_catalog_dir,
publisher: publisher.to_string(),
attrs,
parts: HashMap::new(),
update_logs: HashMap::new(),
parts: BTreeMap::new(),
update_logs: BTreeMap::new(),
})
}
@ -572,7 +572,7 @@ impl CatalogManager {
log_name: &str,
fmri: &Fmri,
op_type: CatalogOperationType,
catalog_parts: HashMap<String, HashMap<String, Vec<String>>>,
catalog_parts: BTreeMap<String, BTreeMap<String, Vec<String>>>,
signature: Option<String>,
) -> Result<()> {
if let Some(log) = self.update_logs.get_mut(log_name) {

View file

@ -7,11 +7,47 @@ use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use serde::Serialize;
use serde_json::ser::{Formatter, Serializer};
use tracing::{debug, instrument};
use super::catalog::{CatalogAttrs, CatalogPart, UpdateLog};
use super::{RepositoryError, Result};
// Python-compatible JSON formatter to ensure (', ', ': ') separators
struct PythonFormatter;
impl Formatter for PythonFormatter {
fn begin_object_key<W: ?Sized + Write>(&mut self, writer: &mut W, first: bool) -> std::io::Result<()> {
if !first {
writer.write_all(b", ")?;
}
Ok(())
}
fn begin_object_value<W: ?Sized + Write>(&mut self, writer: &mut W) -> std::io::Result<()> {
writer.write_all(b": ")
}
fn begin_array_value<W: ?Sized + Write>(&mut self, writer: &mut W, first: bool) -> std::io::Result<()> {
if !first {
writer.write_all(b", ")?;
}
Ok(())
}
}
fn serialize_python_style<T: Serialize>(value: &T) -> Result<Vec<u8>> {
let mut bytes = Vec::new();
let formatter = PythonFormatter;
let mut ser = Serializer::with_formatter(&mut bytes, formatter);
value.serialize(&mut ser).map_err(|e| {
RepositoryError::JsonSerializeError(format!("Python-style serialize error: {}", e))
})?;
bytes.push(b'\n');
Ok(bytes)
}
fn sha1_hex(bytes: &[u8]) -> String {
use sha1::Digest as _;
let mut hasher = sha1::Sha1::new();
@ -53,17 +89,13 @@ fn atomic_write_bytes(path: &Path, bytes: &[u8]) -> Result<()> {
pub(crate) fn write_catalog_attrs(path: &Path, attrs: &mut CatalogAttrs) -> Result<String> {
// Compute signature over content without _SIGNATURE
attrs.signature = None;
let bytes_without_sig = serde_json::to_vec(&attrs).map_err(|e| {
RepositoryError::JsonSerializeError(format!("Catalog attrs serialize error: {}", e))
})?;
let bytes_without_sig = serialize_python_style(&attrs)?;
let sig = sha1_hex(&bytes_without_sig);
let mut sig_map = std::collections::HashMap::new();
let mut sig_map = std::collections::BTreeMap::new();
sig_map.insert("sha-1".to_string(), sig);
attrs.signature = Some(sig_map);
let final_bytes = serde_json::to_vec(&attrs).map_err(|e| {
RepositoryError::JsonSerializeError(format!("Catalog attrs serialize error: {}", e))
})?;
let final_bytes = serialize_python_style(&attrs)?;
debug!(path = %path.display(), bytes = final_bytes.len(), "writing catalog.attrs");
atomic_write_bytes(path, &final_bytes)?;
// safe to unwrap as signature was just inserted
@ -78,17 +110,13 @@ pub(crate) fn write_catalog_attrs(path: &Path, attrs: &mut CatalogAttrs) -> Resu
pub(crate) fn write_catalog_part(path: &Path, part: &mut CatalogPart) -> Result<String> {
// Compute signature over content without _SIGNATURE
part.signature = None;
let bytes_without_sig = serde_json::to_vec(&part).map_err(|e| {
RepositoryError::JsonSerializeError(format!("Catalog part serialize error: {}", e))
})?;
let bytes_without_sig = serialize_python_style(&part)?;
let sig = sha1_hex(&bytes_without_sig);
let mut sig_map = std::collections::HashMap::new();
let mut sig_map = std::collections::BTreeMap::new();
sig_map.insert("sha-1".to_string(), sig);
part.signature = Some(sig_map);
let final_bytes = serde_json::to_vec(&part).map_err(|e| {
RepositoryError::JsonSerializeError(format!("Catalog part serialize error: {}", e))
})?;
let final_bytes = serialize_python_style(&part)?;
debug!(path = %path.display(), bytes = final_bytes.len(), "writing catalog part");
atomic_write_bytes(path, &final_bytes)?;
Ok(part
@ -102,17 +130,13 @@ pub(crate) fn write_catalog_part(path: &Path, part: &mut CatalogPart) -> Result<
pub(crate) fn write_update_log(path: &Path, log: &mut UpdateLog) -> Result<String> {
// Compute signature over content without _SIGNATURE
log.signature = None;
let bytes_without_sig = serde_json::to_vec(&log).map_err(|e| {
RepositoryError::JsonSerializeError(format!("Update log serialize error: {}", e))
})?;
let bytes_without_sig = serialize_python_style(&log)?;
let sig = sha1_hex(&bytes_without_sig);
let mut sig_map = std::collections::HashMap::new();
let mut sig_map = std::collections::BTreeMap::new();
sig_map.insert("sha-1".to_string(), sig);
log.signature = Some(sig_map);
let final_bytes = serde_json::to_vec(&log).map_err(|e| {
RepositoryError::JsonSerializeError(format!("Update log serialize error: {}", e))
})?;
let final_bytes = serialize_python_style(&log)?;
debug!(path = %path.display(), bytes = final_bytes.len(), "writing update log");
atomic_write_bytes(path, &final_bytes)?;
Ok(log

View file

@ -10,7 +10,7 @@ use lz4::EncoderBuilder;
use regex::Regex;
use serde::{Deserialize, Serialize};
use sha2::{Digest as Sha2Digest, Sha256};
use std::collections::{HashMap, HashSet};
use std::collections::{HashMap, HashSet, BTreeMap};
use std::fs;
use std::fs::File;
use std::io::{Read, Write};
@ -228,28 +228,6 @@ pub struct FileBackend {
Option<std::cell::RefCell<crate::repository::obsoleted::ObsoletedPackageManager>>,
}
/// Format a SystemTime as an ISO 8601 timestamp string
fn format_iso8601_timestamp(time: &SystemTime) -> String {
let duration = time
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_else(|_| std::time::Duration::from_secs(0));
let secs = duration.as_secs();
let micros = duration.subsec_micros();
// Format as ISO 8601 with microsecond precision
format!(
"{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:06}Z",
// Convert seconds to date and time components
1970 + secs / 31536000, // year (approximate)
(secs % 31536000) / 2592000 + 1, // month (approximate)
(secs % 2592000) / 86400 + 1, // day (approximate)
(secs % 86400) / 3600, // hour
(secs % 3600) / 60, // minute
secs % 60, // second
micros // microseconds
)
}
/// Transaction for publishing packages
pub struct Transaction {
@ -759,9 +737,9 @@ impl ReadableRepository for FileBackend {
let updated = if latest_timestamp == SystemTime::UNIX_EPOCH {
// If no files were found, use the current time
let now = SystemTime::now();
format_iso8601_timestamp(&now)
crate::repository::catalog::format_iso8601_basic(&now)
} else {
format_iso8601_timestamp(&latest_timestamp)
crate::repository::catalog::format_iso8601_basic(&latest_timestamp)
};
// Create a PublisherInfo struct and add it to the list
@ -1801,9 +1779,9 @@ impl FileBackend {
locale: &str,
fmri: &crate::fmri::Fmri,
op_type: crate::repository::catalog::CatalogOperationType,
catalog_parts: std::collections::HashMap<
catalog_parts: std::collections::BTreeMap<
String,
std::collections::HashMap<String, Vec<String>>,
std::collections::BTreeMap<String, Vec<String>>,
>,
signature_sha1: Option<String>,
) -> Result<()> {
@ -1830,7 +1808,7 @@ impl FileBackend {
Some(p) => p,
None => {
let now = std::time::SystemTime::now();
let ts = format_iso8601_timestamp(&now); // e.g., 20090508T161025.686485Z
let ts = crate::repository::catalog::format_iso8601_basic(&now); // e.g., 20090508T161025.686485Z
let stem = ts.split('.').next().unwrap_or(&ts); // take up to seconds
catalog_dir.join(format!("update.{}.{}", stem, locale))
}
@ -1863,7 +1841,7 @@ impl FileBackend {
Some(s) => s,
None => {
let now = std::time::SystemTime::now();
let ts = format_iso8601_timestamp(&now);
let ts = crate::repository::catalog::format_iso8601_basic(&now);
ts.split('.').next().unwrap_or(&ts).to_string()
}
};
@ -2386,18 +2364,18 @@ impl FileBackend {
// Prepare update entry if needed
if create_update_log {
let mut catalog_parts = HashMap::new();
let mut catalog_parts = BTreeMap::new();
// Add dependency actions to update entry
if !dependency_actions.is_empty() {
let mut actions = HashMap::new();
let mut actions = BTreeMap::new();
actions.insert("actions".to_string(), dependency_actions);
catalog_parts.insert("catalog.dependency.C".to_string(), actions);
}
// Add summary actions to update entry
if !summary_actions.is_empty() {
let mut actions = HashMap::new();
let mut actions = BTreeMap::new();
actions.insert("actions".to_string(), summary_actions);
catalog_parts.insert("catalog.summary.C".to_string(), actions);
}
@ -2427,7 +2405,7 @@ impl FileBackend {
// Create a catalog.attrs file
let now = SystemTime::now();
let timestamp = format_iso8601_timestamp(&now);
let timestamp = crate::repository::catalog::format_iso8601_basic(&now);
// Get the CatalogAttrs struct definition to see what fields it has
let mut attrs = crate::repository::catalog::CatalogAttrs {
@ -2435,10 +2413,10 @@ impl FileBackend {
last_modified: timestamp.clone(),
package_count,
package_version_count,
parts: HashMap::new(),
parts: BTreeMap::new(),
version: 1, // CatalogVersion::V1 is 1
signature: None,
updates: HashMap::new(),
updates: BTreeMap::new(),
};
// Add part information

View file

@ -16,6 +16,23 @@ pub async fn get_file(
State(repo): State<Arc<DepotRepo>>,
Path((publisher, _algo, digest)): Path<(String, String, String)>,
req: Request,
) -> Result<Response, DepotError> {
get_file_impl(repo, publisher, digest, req).await
}
pub async fn get_file_no_algo(
State(repo): State<Arc<DepotRepo>>,
Path((publisher, digest)): Path<(String, String)>,
req: Request,
) -> Result<Response, DepotError> {
get_file_impl(repo, publisher, digest, req).await
}
async fn get_file_impl(
repo: Arc<DepotRepo>,
publisher: String,
digest: String,
req: Request,
) -> Result<Response, DepotError> {
let path = repo.get_file_path(&publisher, &digest).ok_or_else(|| {
DepotError::Repo(libips::repository::RepositoryError::NotFound(

View file

@ -31,6 +31,10 @@ pub fn app_router(state: Arc<DepotRepo>) -> Router {
"/{publisher}/file/1/{algo}/{digest}",
get(file::get_file).head(file::get_file),
)
.route(
"/{publisher}/file/1/{digest}",
get(file::get_file_no_algo).head(file::get_file_no_algo),
)
.route("/{publisher}/info/0/{fmri}", get(info::get_info))
.route("/{publisher}/publisher/0", get(publisher::get_publisher_v0))
.route("/{publisher}/publisher/1", get(publisher::get_publisher_v1))

View file

@ -326,3 +326,58 @@ async fn test_ini_only_repo_serving_catalog() {
}
}
}
#[tokio::test]
async fn test_file_url_without_algo() {
// Setup
let temp_dir = TempDir::new().unwrap();
let repo_path = setup_repo(&temp_dir);
let config = Config {
server: ServerConfig {
bind: vec!["127.0.0.1:0".to_string()],
workers: None,
max_connections: None,
reuseport: None,
cache_max_age: Some(3600),
tls_cert: None,
tls_key: None,
},
repository: RepositoryConfig {
root: repo_path.clone(),
mode: Some("readonly".to_string()),
},
telemetry: None,
publishers: None,
admin: None,
oauth2: None,
};
let repo = DepotRepo::new(&config).unwrap();
let state = Arc::new(repo);
let router = http::routes::app_router(state);
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
// Spawn server
tokio::spawn(async move {
http::server::run(router, listener).await.unwrap();
});
let client = reqwest::Client::new();
let base_url = format!("http://{}", addr);
// Hash found in repo (SHA256 of compressed content likely)
let hash = "40dafd2319edb9b7c930958f7b8d2d59198f88c906d50811b21436008ef0746f";
// Test URL without algo
// Expected format: /{publisher}/file/1/{hash}
let url = format!("{}/test/file/1/{}", base_url, hash);
println!("Requesting: {}", url);
let resp = client.get(&url).send().await.unwrap();
assert_eq!(resp.status(), 200, "Should handle file URL without algorithm");
let _content = resp.text().await.unwrap();
}