From 9b2f74c5c1fba756c3ef524bad7f6974c5cd08a1 Mon Sep 17 00:00:00 2001 From: Till Wegmueller Date: Tue, 29 Jul 2025 16:16:12 +0200 Subject: [PATCH] Introduce obsoleted package management system in IPS - Add `obsoleted.rs` module to handle storing, metadata management, and operations for obsoleted packages. - Implement commands for marking, listing, searching, restoring, exporting, and importing obsoleted packages (`pkg6repo`). - Enhance `RepositoryError` with `From` implementations for various error types to manage database and serialization-related errors. - Introduce reusable data structures for obsoleted package metadata and export representation. - Update `Cargo.toml` and `Cargo.lock` to include new dependencies (`redb`, `bincode`, etc.). - Document obsoleted package workflow and integration details in `doc/obsoleted_packages.md` for contributors. - Refactor repository internals to integrate obsoleted package support without disrupting existing workflow. - Add robust error handling, logging, and pagination for enhanced usability and scalability. --- Cargo.lock | 20 + doc/Obsoletion Index.txt | 54 + doc/obsoleted_packages.md | 432 +++++ libips/Cargo.toml | 6 + libips/src/repository/file_backend.rs | 44 +- libips/src/repository/mod.rs | 45 + libips/src/repository/obsoleted.rs | 2399 +++++++++++++++++++++++++ pkg6repo/src/e2e_tests.rs | 146 ++ pkg6repo/src/error.rs | 8 + pkg6repo/src/main.rs | 666 +++++++ pkg6repo/src/pkg5_import.rs | 101 +- 11 files changed, 3903 insertions(+), 18 deletions(-) create mode 100644 doc/Obsoletion Index.txt create mode 100644 doc/obsoleted_packages.md create mode 100644 libips/src/repository/obsoleted.rs diff --git a/Cargo.lock b/Cargo.lock index a57d3f7..13ad048 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -144,6 +144,15 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -954,6 +963,7 @@ checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" name = "libips" version = "0.1.2" dependencies = [ + "bincode", "chrono", "diff-struct", "flate2", @@ -963,6 +973,7 @@ dependencies = [ "object 0.23.0", "pest", "pest_derive", + "redb", "regex", "semver", "serde", @@ -1504,6 +1515,15 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "redb" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd7f82ecd6ba647a39dd1a7172b8a1cd9453c0adee6da20cb553d83a9a460fa5" +dependencies = [ + "libc", +] + [[package]] name = "redox_users" version = "0.4.6" diff --git a/doc/Obsoletion Index.txt b/doc/Obsoletion Index.txt new file mode 100644 index 0000000..d9dd501 --- /dev/null +++ b/doc/Obsoletion Index.txt @@ -0,0 +1,54 @@ +Efficient Filesystem Organization for Obsolete Package Metadata in Rust1. Executive SummaryThe challenge of managing a large, evolving dataset of package obsolescence information, represented in JSON files and indexed by Fully Qualified Module/Resource Identifiers (FMRIs), demands a highly efficient and robust storage strategy. Key requirements include minimizing write operations, optimizing storage space, enabling rapid client access, maintaining data integrity, and leveraging the capabilities of the Rust programming language. This is particularly critical for maintaining the health and operational continuity of any software supply chain.A hybrid storage architecture is proposed to address these requirements. This architecture combines a Content-Addressable Storage (CAS) layer for deduplicated JSON blobs with a high-performance, embedded, ACID-compliant key-value store, specifically redb, for indexing FMRIs to their corresponding content hashes. Merkle trees will provide an additional layer for data integrity verification and comprehensive dataset versioning. Atomic write operations and efficient binary serialization techniques will be employed to minimize disk I/O and optimize storage.This solution offers several significant benefits: it achieves substantial storage efficiency through inherent deduplication of identical content, ensures robust data integrity and crash-safety, supports concurrent client access for diverse query patterns, and establishes a foundational framework for tracking the complete lifecycle of software packages.2. Introduction: The Challenge of Obsolescence Data ManagementDefining Package Obsolescence in a Software Ecosystem ContextPackage obsolescence represents a critical concern in modern software ecosystems. It occurs when a component or technology is no longer manufactured, available, or supported by its original supplier.1 This phenomenon can arise from various factors, including rapid technological advancements, shifts in market demand, changes in government regulations, or the natural end-of-production for older components.1 Beyond individual parts, the broader concept of digital obsolescence encompasses the risk of data loss due to the inability to access digital assets, often caused by the continuous replacement of hardware and software with increasingly incompatible formats.3 This can be a deliberate strategy, such as "postponement obsolescence" (intentionally upgrading only parts of a system) or "systemic obsolescence" (designing new versions to be incompatible with old), or it can be purely "technical obsolescence" driven by the adoption of newer, more accessible technologies.3The implications of unmanaged obsolescence are severe for any organization relying on software. It can lead to costly downtime if critical components are unavailable, force premature and expensive system replacements, and introduce significant security vulnerabilities due to unpatched or unsupported software.1 Proactive obsolescence management is therefore not merely a technical detail but a vital strategic imperative for maintaining operational continuity and mitigating substantial business risks.The Specific ProblemThe core problem at hand involves the efficient storage and retrieval of a large, dynamic dataset of JSON files. These files are designed to mark software packages as obsolete, uniquely identified by an FMRI (Fully Qualified Module/Resource Identifier), and must include detailed information about their replacements. The term "large amount" suggests a dataset potentially spanning millions to billions of individual records, necessitating a solution that is highly scalable in terms of both storage capacity and access performance.GoalsThe design of this system is guided by several explicit and implicit objectives:Minimize Write Operations: Reducing the frequency and volume of disk I/O is crucial. This extends the lifespan of storage media, particularly Solid-State Drives (SSDs), and enhances overall system performance by decreasing latency associated with disk writes.Optimize Storage Space: Efficient storage of JSON data is paramount, especially given the potential for content duplication across records. The system must avoid redundant storage of identical data.Enable Efficient Client Access: The solution must provide fast lookups and retrieval capabilities to support diverse client queries, ensuring responsiveness for users or other system components accessing obsolescence information.Maintain Data Integrity: Ensuring the consistency, accuracy, and recoverability of the stored data is fundamental. The system must be resilient to failures and capable of restoring to a consistent state.Rust Implementation: The application is to be written in Rust, leveraging its strengths in performance, memory safety, and robust concurrency primitives to build a reliable and high-performance system.3. Understanding Package Obsolescence and Replacement MetadataCurrent Paradigms in Package Management for Obsolescence and ReplacementsModern package management systems employ various strategies to handle software obsolescence and define replacement relationships. These strategies offer valuable insights into the necessary features for a dedicated obsolescence management system.Digital preservation, in a broader sense, combats obsolescence through practices such as bitstream copying (data backup), refreshing (moving data between similar formats), migration (converting data to newer formats), encapsulation (bundling data with metadata and environment specifications), and emulation (simulating obsolete systems).3 The adoption of open-source software is also recognized as a strategic defense against digital obsolescence due to its adaptability and source code availability.3Specific package managers illustrate these concepts:Debian (APT): Debian's Advanced Package Tool (APT) manages software packages, which are essentially archives containing executables, libraries, configuration files, and metadata like version and dependencies.5 When a package is removed, the dpkg --purge or apt-get remove --purge commands ensure a complete deletion, including associated configuration files.7 Debian also utilizes "meta-packages" (e.g., gnome), which are largely empty packages that define dependencies on a coherent set of other packages. As the meta-package evolves, APT automatically handles the addition or removal of its component packages on the user's system.8 For Rust applications, cargo-deb can generate Debian packages by reading metadata from Cargo.toml.5 Debian repositories themselves are structured hierarchically by distribution, component, and architecture, using signed Release files for trust and .changes files (which include checksums) to process updates.9RPM: The RedHat Package Manager (RPM) is a sophisticated archiving format that packs files and directories along with metadata such as version numbers and dependencies.11 RPM packages can declare that they "obsolete" or "replace" other packages using specific metadata tags.13 The rpm -F or --freshen command facilitates upgrades by removing older versions of a package after a newer one is installed.14 RPM relies on a system-wide database to track installed software, though users can create private databases for more flexible management.11npm: The Node Package Manager (npm) provides the npm deprecate command, which allows developers to mark specific package versions as outdated or unmaintained.15 This action does not remove the package from the npm registry but flags it, issuing a warning to users who attempt to install it.15 Deprecation often serves as a signal for potential security vulnerabilities or compatibility issues.15 Research indicates that a substantial portion of widely used npm packages are deprecated, frequently due to unmaintained repositories rather than explicit npm deprecate flags, highlighting a visibility challenge for developers.16 The npm caching mechanism stores downloaded .tgz files and various index files locally to speed up installations. It validates cached packages by generating and comparing SHA-512 checksums against original hashes.17Cargo (Rust): In the Rust ecosystem, Cargo.toml serves as the manifest file for a package, containing essential metadata.18 The package.metadata table within Cargo.toml is specifically ignored by Cargo, allowing external tools to store project-specific configuration there.18crates.io is the default public registry for Rust packages. Its index is maintained as a Git repository, containing a searchable list of available crates.19 Each package in this index has a file where each line is a JSON object describing a published version, including its name, version, dependencies, checksum, and a yanked field to indicate if the version has been deprecated.19 This yanked field is the only metadata that can be modified after publication, providing a mechanism for marking obsolescence. The index files are organized into a tiered directory structure based on the initial characters of the package name to manage a large number of entries efficiently.19Proposed Data Model for Obsolete Packages and their ReplacementsThe core of the proposed system will utilize JSON documents to represent the obsolescence status of packages. The FMRI (Fully Qualified Module/Resource Identifier) naturally serves as the unique primary key for these records, ensuring precise identification and lookup.A critical step for ensuring consistency and preventing data duplication in a content-addressable system is the standardization of FMRI canonicalization. Just as URLs require canonicalization before hashing to ensure that logically identical URLs (e.g., http://example.com/ and http://example.com) produce the same hash 23, FMRIs must undergo a similar process. This involves a well-defined, strict set of rules to normalize the FMRI string (e.g., consistent casing, removal of redundant delimiters, resolution of path components). Implementing this as a dedicated utility function in the Rust application is paramount to ensure that logically equivalent FMRIs always map to the same content hash, thus enabling accurate deduplication and reliable lookups.The proposed JSON structure for an obsolescence record is as follows:JSON{ + "fmri": "pkg:/compiler/gcc@11.3.0,5.11-0.175.3.4.0.5.0:20220915T094500Z", + "status": "obsolete", + "obsolescence_date": "2024-07-20T10:00:00Z", + "deprecation_message": "This version of GCC is deprecated due to critical security vulnerabilities. Please upgrade.", + "replaces":, + "obsoleted_by": [ + "pkg:/developer/toolchain@2.0.0" + ], + "metadata_version": 1, + "content_hash": "sha256-abc123def456..." +} +Proposed Data Model for Obsolete Package RecordsField NameTypeDescriptionExamplefmriStringThe unique Fully Qualified Module/Resource Identifier for the package.pkg:/runtime/java/jre-8statusStringCurrent obsolescence status (e.g., "obsolete", "deprecated", "active")."obsolete"obsolescence_dateISO 8601 TimestampThe date and time when the package was officially marked obsolete."2024-07-20T10:00:00Z"deprecation_messageString (Optional)A human-readable explanation for the obsolescence."End-of-Life due to security vulnerabilities."replacesArray of FMRIs (Optional)A list of FMRIs that this obsolete package is intended to replace.["pkg:/runtime/java/jre-11", "pkg:/runtime/java/jre-17"]obsoleted_byArray of FMRIs (Optional)A list of FMRIs that supersede or make this package obsolete.["pkg:/runtime/java/jre-11"]metadata_versionIntegerAn internal version number for the JSON schema, enabling schema evolution.1content_hashStringCryptographic hash (e.g., SHA-256) of the entire JSON document, used for content-addressable storage and integrity verification."sha256-d4c5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5"The replaces and obsoleted_by fields are crucial as they directly capture the replacement relationships specified in the user's requirements. The metadata_version field ensures that the system can gracefully handle future changes to the JSON schema without invalidating historical data. The content_hash links the logical record to its physical storage in the Content-Addressable Storage layer.Considerations for Transitive Obsolescence and Dependency ChainsPackage managers routinely manage both direct and transitive dependencies. Direct dependencies are explicitly declared by a project, while transitive dependencies are those required by the direct dependencies themselves.24 These "hidden" dependencies can significantly increase software size and complexity, leading to version conflicts, compatibility issues, and the silent propagation of security vulnerabilities.24 For instance, a vulnerable transitive dependency can impact multiple projects, making risk mitigation complex and time-consuming.26The replaces and obsoleted_by fields in the proposed data model inherently form a directed graph of obsolescence relationships. When a package becomes obsolete, its obsolescence status can cascade through the dependency chain, leading to what can be termed "transitive obsolescence." While package managers like RPM use Obsoletes tags 13 and npm uses deprecate flags 15 to manage direct obsolescence, efficiently determining the full transitive impact is a complex problem.25 Functional package managers, for example, aim to precisely describe the entire dependency graph, including bootstrap binaries, to manage such complexities.27A comprehensive system for managing obsolescence would require a dedicated layer or data structure to efficiently query and manage these transitive relationships. Techniques from graph theory could be applied; for example, a disjoint-set data structure (also known as Union-Find) could efficiently determine if two packages belong to the same "obsolescence set" or track the "active" replacement for an obsolete package across a chain of dependencies.28 Alternatively, the Chain of Responsibility design pattern could model how obsolescence "flows" through a dependency chain, with each package acting as a handler that decides if it is affected or if it passes the "obsolete" status to its dependents.29 The current system provides the raw data necessary to build this graph, but the "intelligence" for analyzing and managing transitive obsolescence would reside in an additional, specialized component built on top of this core data store.4. Core Storage Principles for Efficient Filesystem OrganizationContent-Addressable Storage (CAS) for Deduplication and IntegrityContent-Addressable Storage (CAS) is a fundamental principle for efficiently organizing and ensuring the integrity of fixed content. In a CAS system, information is retrieved based on its content, rather than its name or physical location.30 This is achieved by passing the file's content through a cryptographic hash function to generate a unique "content address" or key.30 The filesystem's directory then stores these content addresses along with pointers to the physical storage location of the content.30For the management of JSON obsolescence files, CAS offers significant advantages:Deduplication: A key benefit is the automatic deduplication of data. If an attempt is made to store an identical JSON file (e.g., multiple packages having the exact same replacement information), the hash function will produce the same content address. The system recognizes that the content already exists and avoids storing a duplicate, thereby ensuring that files are unique within the system and optimizing storage space.30 This directly addresses the requirement for efficiently organizing filesystem space.Integrity: Because the content address is derived directly from the file's content, any alteration to the JSON document, even a single character change, will result in a different hash.30 This provides a strong assurance that the stored file remains unchanged and untampered, which is crucial for trusted metadata in a package management system.Location Independence: Retrieval in a CAS system is based on the content hash, decoupling the logical access from the physical storage location. This allows files to be moved between storage devices or even different media without breaking logical links, as only the internal mapping from the key to the physical location needs updating.30Suitability for Fixed Content: CAS is particularly well-suited for fixed content, such as the JSON records representing package obsolescence. Once a record is created for a specific FMRI and version, its content is considered immutable. Any "update" implies creating a new record with a new content hash, rather than modifying the existing one in place.30Hashing Strategies for FMRIs and JSON Content:SHA-256 is a widely accepted and robust cryptographic hash function, generating a 256-bit (32-byte) hash. It offers strong collision resistance and is suitable for generating the content_hash for the JSON documents.23When selecting a hashing algorithm for a CAS system, a balance must be struck between cryptographic security (collision resistance) and computational performance. While SHA-256 provides a high level of security, other algorithms like Blake3 offer significant performance advantages due to their design for parallel computation.33 For internal system integrity checks where adversarial inputs are not a primary concern, a faster, non-cryptographic hash like XXH3 (used internally by redb for checksums 37) could be considered. However, for the content_hash that guarantees the integrity of the JSON blobs against accidental or malicious alteration, a cryptographic hash like SHA-256 is generally preferred for its strong security properties. If performance becomes a bottleneck for hashing extremely large numbers of JSON documents, Blake3 should be thoroughly benchmarked as an alternative. The chosen hashing algorithm must be consistently applied across the entire system to ensure reliable content addressing.Hashing Algorithm Suitability for CASAlgorithmTypeCollision ResistancePerformanceParallelizabilityTypical Use CaseSHA-256CryptographicHighModerateLimitedGeneral-purpose integrity, digital signatures, blockchainBlake3CryptographicHighHighExcellentFast hashing of large data, concurrent systemsXXH3Non-CryptographicModerateVery HighGoodInternal checksums, non-security-critical data integrityA CAS-like directory structure typically organizes files based on their content hash. This involves creating nested subfolders derived from prefixes of the hash (e.g., root/ab/cd/ef/hash_remainder.json).31 This design is optimized for handling a very large number of files by distributing them across many directories, preventing any single directory from becoming excessively large and improving lookup performance by reducing the number of entries per directory.Minimizing Write Operations and Ensuring AtomicityMinimizing write operations is crucial for extending storage lifespan and improving performance, especially on SSDs, which have finite write endurance.38 Ensuring atomicity prevents data corruption in the event of system failures.Copy-on-Write (COW): This is a resource-management technique where data is shared by multiple consumers until one attempts to modify it, at which point a private copy is created.40 COW is fundamental to modern filesystems like ZFS and Btrfs, and databases such as Microsoft SQL Server.40 In the context of obsolescence data, when a package's status changes (e.g., a new replacement is identified), a new JSON record is logically created with a new content hash. The old record, with its original content hash, remains in the CAS, effectively leveraging COW principles at the logical level. This approach naturally supports efficient snapshots and versioning without requiring full data duplication for each change.41 On supported filesystems, the reflink Rust crate can utilize OS-level block cloning capabilities for extremely efficient file copies, where only metadata is updated until actual data modification occurs.42Atomic File Writes: To prevent data corruption from crashes or interruptions, it is essential that file writes are atomic, meaning either all changes within an operation occur or none of them do.43 Database systems like SQLite achieve this through mechanisms like rollback journals and atomic file renames or deletions.43 For individual JSON files within the CAS, the atomic-write-file Rust crate provides this critical functionality.44 It works by writing new content to a temporary file in the same filesystem. Upon successful completion and flushing of the new content, the temporary file is atomically renamed to the target path. This ensures that the original file (if any) is preserved in case of an interruption before the commit, preventing the file from being left in a broken or inconsistent state.44Append-Only Storage Patterns: An append-only storage pattern involves adding new data exclusively to the end of a log or file.48 This pattern is highly beneficial for write performance as it minimizes random writes and seeks, which are typically slower operations. For obsolescence data, new records are appended, and existing ones are logically marked as superseded (e.g., by updating an index to point to the new record). Physical deletion of old data can be managed through lazy deletion 53 or Time-to-Live (TTL) policies.51 These policies allow data to be marked for eventual cleanup without immediate physical removal, which further reduces the number of physical write operations and mitigates write amplification.Performance Implications of Many Small Files vs. Fewer Large Files:A significant challenge when dealing with a "large amount of JSON files" is the inherent performance overhead associated with managing numerous small files on a traditional filesystem. Transferring or processing a large number of small files incurs substantial operating system overhead due to the repeated execution of metadata operations such as find(), open(), and close() for each file.56 These metadata calls are often processed serially rather than in parallel, leading to performance bottlenecks.56 In contrast, large files benefit from contiguous operations, larger read/write sizes, and network optimizations like SMB3 Large MTU or Multi-channel capabilities.56 Simple strategies like zipping small files together before transfer can significantly improve performance by reducing the number of individual file operations.57The "small file problem" poses a direct threat to the efficiency of a system designed to manage a large collection of JSON files. Directly storing each JSON file as a separate entity on a standard filesystem would lead to severe performance degradation and inefficient storage utilization. This is where the strategic adoption of an embedded key-value store becomes critical. An embedded database, such as redb or grebedb, addresses this problem by abstracting away the individual JSON files. Instead of managing millions of discrete files, the database organizes data within larger, optimized internal structures like B-trees, which are stored within one or a few large files on the disk.37 This approach allows the application to interact with a single logical database, or a limited number of large files, rather than incurring the overhead of thousands or millions of small file operations, thereby centralizing metadata and significantly improving I/O patterns.Merkle Trees for Data Integrity and VersioningA Merkle tree, also known as a hash tree, is a cryptographic data structure that plays a crucial role in ensuring the integrity, deduplication, and efficient verification of large datasets.59 In a Merkle tree, every leaf node is labeled with the cryptographic hash of a data block, and every non-leaf node (branch or internal node) is labeled with the cryptographic hash of the concatenated hashes of its child nodes.59 This recursive hashing process culminates in a single "Merkle Root" hash at the top of the tree, which uniquely identifies the state of the entire dataset.60The benefits of applying Merkle trees to this obsolescence management system are multifaceted:Integrity Verification: Merkle trees enable efficient and secure verification of large datasets. If the Merkle root hash changes, it immediately signals that data within the dataset has been altered or corrupted. By traversing the tree from the root, specific corrupted parts or modified data blocks can be quickly identified.60Deduplication: By design, Merkle trees inherently support data deduplication. Identical content blocks or files will produce the same hash, allowing the system to store them only once while referencing them multiple times within the tree structure.60Efficient Versioning and Snapshots: Merkle trees are foundational for efficient version control systems, enabling "time travel" by capturing the exact state of data at a specific moment.63 Git, for example, heavily relies on Merkle trees for its snapshot-based version control, where each commit effectively creates a new Merkle root representing the project's state.60 When only parts of a dataset change, only the hashes along the path from the modified leaves up to the root need to be recomputed, minimizing computational overhead.60Distributed Systems: Merkle trees are widely used in distributed systems (e.g., BitTorrent, IPFS, Bitcoin, Ethereum, Cassandra) to verify data integrity across peer-to-peer networks and distributed databases.60For this application, the entire collection of JSON obsolescence files can be represented as a Merkle tree. Each JSON file (or a logical block of JSONs) would serve as a leaf node, while logical groupings or directories would form internal nodes, with their hashes derived from their children.60 The root hash of this Merkle tree would then act as a unique "version ID" for the entire obsolescence dataset at any given point in time.Several Rust crates are available to facilitate the implementation of Merkle trees: rs-merkletree 62 and mt-rs 34 provide basic Merkle tree construction and verification. rs-merkle offers more advanced features, including transactional changes and the ability to roll back to previously committed tree states, similar to Git.66 Additionally, exonum-merkledb 69 and merkle-tree-db 70 are crates that integrate Merkle tree functionality with database backends, providing features like snapshots and forks, which could be valuable for managing historical states within the obsolescence data.5. Architectural Design for the Rust ApplicationChoosing an Embedded Key-Value StoreThe decision to utilize an embedded key-value store is central to addressing the "many small files" problem and establishing a robust, high-performance data storage layer directly within the Rust application. Embedded databases eliminate the overhead of network latency and simplify deployment by integrating directly with the application's process.58 They manage data in optimized internal structures, such as B-trees, which are typically stored within one or a few large files, rather than exposing numerous small files to the operating system, thereby significantly improving I/O efficiency.37An evaluation of prominent Rust-native embedded databases, redb and grebedb, reveals distinct capabilities:Comparison of Rust Embedded Key-Value StoresFeatureredbgrebedbCore DesignPure Rust, inspired by lmdb; Copy-on-Write B-trees.37Lightweight, B+ tree where each node is a file.53ACID ComplianceFully ACID-compliant transactions (Atomicity, Consistency, Isolation, Durability).71No traditional transactions; flush() provides atomic saving. Internal consistency via COW, revision counters, atomic renames.53Concurrency ModelMulti-Version Concurrency Control (MVCC): multiple concurrent readers, single writer without blocking.37Designed for single-process applications; no threads for background work, no async support. File locking supported.53Crash-SafetyCrash-safe by default; uses checksums and "god byte" for atomic commits and recovery.37Internal consistency via COW, atomic renames. Not explicitly "crash-safe by default" in the same robust manner as redb.53PerformanceBenchmarks similar to lmdb/rocksdb. Optimized for mixed read-write. Zero-copy reads.71Performance dependent on underlying filesystem. Inserting sorted keys is best.53Write AmplificationAs a COW B-tree, experiences WA on flash, but aims to minimize by only writing affected blocks and internal compaction.38Implemented as B+ tree with nodes saved to individual files, which can contribute to WA, especially with small, random writes.38Development StatusStable file format, beta quality; not widely deployed in production.71Usable but not extensively tested or used in production; use with caution.53Rationale for Selecting redb:The need for "enabling client access" for a "large amount of JSON files" strongly implies a requirement for concurrent read access, and potentially concurrent, though minimized, writes. redb stands out as the superior choice due to its explicit support for Multi-Version Concurrency Control (MVCC), which allows multiple concurrent readers to access the database without blocking, even while a single writer is active.71 This capability is fundamental for a responsive, multi-client system. Furthermore, redb's full ACID compliance and its design for crash-safety by default 71 provide the necessary data reliability for managing critical obsolescence metadata. While grebedb is lightweight, its explicit lack of concurrency support and focus on single-process applications 53 make it less suitable for the stated requirements. Although redb is currently considered "beta quality" and not yet widely deployed in production 71, its feature set aligns much more closely with the implicit requirements for a robust and scalable system.Implementing the Content-Addressable Storage Layer in RustThe redb database will serve as the primary index, storing the mapping from a canonicalized FMRI string to its corresponding content_hash (SHA-256 string). The actual JSON content, or "blob," will be stored in a separate CAS filesystem layer, addressed directly by its content_hash. This architectural separation optimizes database size and access patterns by keeping large binary data out of the primary index.For computing the content_hash, the sha2 crate is a suitable choice for SHA-256 hashing of the JSON content.36 If performance profiling indicates that hashing is a bottleneck, particularly for very large JSON documents or extremely high ingestion rates, the blake3 crate (often used via libraries like mt-rs 34) could be considered. Blake3 is designed for parallel computation, which can offer speed advantages for large data.33When writing new JSON blobs to the CAS filesystem (e.g., to a path like data//.json), the atomic-write-file crate should be utilized.44 This crate ensures that the JSON file is fully written and flushed to disk before it is atomically renamed to its final destination. This process is critical for preventing partial or corrupted files from being left on disk in the event of a system interruption or crash. In a CAS, where content is immutable, this primarily ensures that a new, valid blob is fully present before it is referenced.While the user specified JSON files, for optimal storage efficiency and faster I/O operations, it is highly recommended to convert the JSON data into a compact binary format before writing it to the CAS. JSON, while human-readable, is verbose and inefficient for large-scale storage and processing. Binary serialization formats are significantly more compact and faster to parse and serialize, directly contributing to minimizing physical data written to disk and improving overall I/O bandwidth.81 The serde framework in Rust is the standard for this, allowing efficient serialization and deserialization of Rust structs (representing the defined JSON data model) to various formats.81 Recommended binary formats include:MessagePack: Implemented by the rmp_serde crate. This format is an efficient binary representation that closely resembles a compact JSON.81CBOR (Concise Binary Object Representation): Implemented by the ciborium crate. CBOR is designed for small message sizes and avoids the need for version negotiation, making it robust for data exchange.81The implication of this approach is that the JSON files will serve as the logical representation for external clients or human inspection, but internally, the data will be stored in a highly optimized binary format. This separation ensures both human readability when needed and maximum efficiency for storage and I/O operations.Designing for Client Access and Data RetrievalEfficient client access is a primary requirement, necessitating rapid lookup and retrieval mechanisms.Indexing Strategies for Rapid FMRI Lookup: The redb database, with its BTreeMap-based API 71, provides highly efficient key-value lookups, typically with O(log N) complexity, where N is the number of records. The canonicalized FMRI will serve as the key, and the content_hash will be the value stored in redb. redb's support for zero-copy reads 73 means that when a content_hash is retrieved from the database's internal cache, it can be accessed directly without unnecessary data copying, further minimizing CPU overhead and improving read performance.Implementing Snapshotting and Versioning Mechanisms: The need to track "obsolete" packages inherently implies a requirement for historical data and versioning. A layered approach to versioning can provide both granular control and comprehensive data integrity.redb's MVCC and Savepoints: redb's Multi-Version Concurrency Control (MVCC) design 37 naturally provides read isolation and logical snapshots of the FMRI-to-hash mapping. Each ReadTransaction in redb offers a consistent view of the database as it existed at the moment the transaction was opened. Furthermore, redb supports savepoints (both ephemeral and persistent) 37, which can be used to explicitly capture and revert to past states of the FMRI-to-hash index.Merkle Tree for Content Versioning: To track the history and ensure the integrity of the entire collection of obsolescence records, a Merkle tree can be constructed over the content hashes of the binary JSON blobs stored in the CAS. Each time a new batch of obsolescence records is added or existing ones are updated (resulting in new content hashes), a new Merkle root can be computed for the entire dataset and stored as a version identifier.60 This Merkle root provides a verifiable snapshot of the underlying data blobs. Rust crates such as rs-merkle 66 support transactional changes to the tree and rolling back to previous tree states, analogous to Git's version control.This combination of redb's transactional history for the index and Merkle trees for content-based versioning offers a powerful and verifiable layered versioning system. redb's savepoints allow point-in-time recovery of the FMRI-to-hash index, while the Merkle tree root provides a cryptographically verifiable snapshot of the underlying data blobs themselves. This approach effectively avoids the storage inefficiency of "full data duplication" for versioning 54, allowing clients to query either the latest state or a specific historical state of the obsolescence data with confidence in its integrity.Handling Concurrent Read and Write Access Patterns: redb's MVCC architecture 71 is designed to allow multiple concurrent readers without blocking, which is ideal for a system with many clients querying obsolescence data. However, redb enforces a single active WriteTransaction at any given time.76 For applications requiring high-throughput write scenarios, all write operations should be funneled through a single, dedicated writer thread or an asynchronous worker pool. In Rust, this can be efficiently managed using tokio::task::spawn_blocking to offload blocking redb write operations from the main asynchronous runtime thread.83 This ensures that the application remains responsive for read clients even during periods of heavy write activity.6. Proposed Solution ArchitectureThe proposed architecture integrates a content-addressable storage layer with a high-performance embedded key-value store to manage package obsolescence data efficiently.Conceptual Diagram (Textual Representation)+------------------------------------------------------------------+ + +| Client Applications (Rust, other languages via FFI/API) | +|------------------------------------------------------------------| +| Requests for Obsolescence Data (FMRI lookup, history queries) | ++--------------------------|---------------------------------------+ +| + V ++--------------------------|---------------------------------------+ + +| Rust Application Layer | +| (API Endpoints, Business Logic, Data Access Orchestration) | ++--------------------------|---------------------------------------+ +| +| (FMRI Lookup, Content Hash Retrieval) + V ++--------------------------|---------------------------------------+ + +| Data Storage Layer (Embedded KV Store + CAS) | +| | +| +---------------------------------------+ | +| | redb (Embedded Key-Value Store) |<---------------------+ +| | (Canonicalized FMRI -> Content Hash) | | +| | (MVCC for concurrent reads) | | +| +---------------------------------------+ | +| | | +| | (Content Hash -> Physical Path) | +| V | +| +---------------------------------------+ | +| | Content-Addressable Storage (Filesystem) | +| | (Binary JSON Blobs stored by Hash) | | +| | (Atomic Writes, Copy-on-Write principles) | +| +---------------------------------------+ | ++--------------------------|---------------------------------------+ +| +| (Physical I/O Operations) + V ++--------------------------|---------------------------------------+ + +| Underlying Disk (SSD/HDD, OS Filesystem) | ++------------------------------------------------------------------+ +Detailed Breakdown of ProcessesThe architecture facilitates efficient data flow and management through defined processes for ingestion, updates, retrieval, and historical access.Data Ingestion (New Obsolescence Record):Receive Data: The system receives new JSON data representing a package's obsolescence status.FMRI Canonicalization: The FMRI string from the JSON is strictly canonicalized to ensure a consistent representation, which is crucial for reliable hashing and key lookups.Content Hashing: A SHA-256 content_hash is computed for the entire JSON document. This hash uniquely identifies the content.Binary Serialization: The JSON data is serialized into a compact binary format, such as MessagePack or CBOR, using the serde framework. This significantly reduces storage footprint and improves I/O performance compared to raw JSON.Atomic Blob Write: The binary blob is atomically written to the Content-Addressable Storage (CAS) filesystem. The file path is derived directly from the content_hash (e.g., data//.bin). The atomic-write-file crate ensures that this write operation is robust against interruptions, guaranteeing either the full new content is written or the old state is preserved. If an identical blob already exists (due to content deduplication), no physical write operation is performed, saving disk I/O.Index Update in redb: Within a redb write transaction, the mapping from the canonicalized FMRI to the content_hash is inserted or updated.redb Transaction Commit: The redb transaction is committed, making the new FMRI-to-hash mapping durable and visible to readers.(Optional) Merkle Tree Update: For comprehensive dataset versioning, the new content_hash is incorporated into a Merkle tree representing the entire collection of obsolescence records. A new Merkle root is then computed and stored as a version identifier for the dataset.Data Update (Existing Obsolescence Record Changes):Receive Modified Data: A request to update an existing obsolescence record is received.New Content Hash: A new content_hash is generated for the modified JSON content after canonicalization and binary serialization. This reflects the immutability principle of CAS.New Blob Write: The new binary blob is written to the CAS filesystem, similar to ingestion.redb Index Update: The redb entry for the FMRI is updated to point to the new content_hash. The old content_hash and its corresponding binary blob remain in the CAS, leveraging Copy-on-Write principles to implicitly preserve historical data.redb Transaction Commit: The redb transaction is committed.(Optional) Merkle Tree Update: The Merkle tree is updated with the new content hash, and a new Merkle root is computed and stored.Data Retrieval (Client Access):Client Request: A client requests obsolescence data for a specific FMRI.FMRI Canonicalization: The requested FMRI is canonicalized.redb Read Transaction: A redb read transaction is opened. redb's MVCC allows multiple concurrent read transactions without blocking.Content Hash Lookup: The canonicalized FMRI is looked up in redb to retrieve its associated content_hash.Blob Retrieval: The content_hash is used to locate and read the binary JSON blob from the CAS filesystem.Binary Deserialization: The retrieved binary blob is deserialized back into its original JSON format for the client.Historical Data Access (Snapshotting):Historical Query: A client requests obsolescence data as of a specific historical point, identified either by a redb savepoint ID or a Merkle tree root hash.redb Savepoint Access: If a redb savepoint ID is provided, a redb read transaction is opened at that specific savepoint. This allows the system to retrieve the FMRI-to-hash mapping as it existed at that precise historical moment.Merkle Tree Version Access: If a Merkle tree root hash is provided, the system uses this root to logically reconstruct the filesystem view of the obsolescence data at that historical point and retrieve the relevant content_hash for the requested FMRI.Blob Retrieval and Deserialization: The content_hash (from either redb savepoint or Merkle tree) is used to retrieve and deserialize the corresponding JSON blob from the CAS.7. Implementation Considerations and Best Practices in RustImplementing this architecture in Rust requires careful consideration of specific crates, concurrency patterns, error handling, and performance optimizations to fully leverage the language's strengths.Specific Rust Crates and Their Applicationredb: This crate will form the core embedded key-value store. Its API for database creation (Database::create), initiating write and read transactions (begin_write, begin_read), opening tables (open_table), inserting and retrieving data (insert, get), committing changes (commit), and creating savepoints (savepoint) will be central to managing the FMRI-to-content-hash mapping.37atomic-write-file: This crate is crucial for ensuring the integrity of individual JSON blobs written to the CAS filesystem. It guarantees that files are written atomically, preventing partial or corrupted data from being stored on disk in the event of system interruptions.44serde with rmp_serde (MessagePack) or ciborium (CBOR): The serde framework is indispensable for efficient binary serialization and deserialization of the JSON data. rmp_serde for MessagePack or ciborium for CBOR will provide compact, performant binary representations of the obsolescence records for storage, significantly reducing disk space and I/O overhead compared to raw JSON.81sha2 or blake3: For computing the cryptographic content_hash of the JSON data, the sha2 crate (for SHA-256) offers strong security. If higher hashing throughput is required, blake3 (which can be integrated via crates like mt-rs) provides parallelizable hashing capabilities.34rs-merkle or mt-rs: If a full Merkle tree is implemented for comprehensive dataset versioning and integrity proofs, rs-merkle 66 or mt-rs 34 would be used. These libraries enable building the tree, generating proofs, and managing tree state changes.Strategies for Managing Concurrency and Thread SafetyRust's strong type system and ownership model inherently promote thread safety. For this architecture, specific strategies are key:redb's MVCC: The core of concurrency management relies on redb's Multi-Version Concurrency Control (MVCC).71 This design allows multiple ReadTransaction instances to operate concurrently without blocking each other, providing a consistent view of the database for each reader.Single Writer Pattern: redb enforces a single active WriteTransaction at any given time.76 To handle multiple concurrent write requests from clients without blocking the main application thread, all write operations should be funneled through a single, dedicated writer thread or an asynchronous worker pool. In an asynchronous Rust application (e.g., using Tokio), tokio::task::spawn_blocking can be used to offload these blocking redb write operations to a separate thread pool, ensuring that the main event loop remains responsive.83Arc/Mutex for Shared State: Any application-level state that needs to be shared and potentially mutated across multiple threads (e.g., the redb::Database instance itself, if not managed by a dedicated actor) should be wrapped in Arc> or Arc> for safe shared ownership and controlled mutation. However, redb handles its internal locking, reducing the need for extensive manual mutex management at the application level for database operations.Robust Error Handling and Crash Recovery MechanismsRobustness is paramount for a system managing critical metadata. Rust's Result type is fundamental for explicit error handling.redb's Crash-Safety: The system should primarily rely on redb's default crash-safety mechanisms and atomic commit strategies.37redb uses checksums and a "god byte" to ensure that transactions are either fully committed or fully rolled back, even after power failures or abrupt process termination. Its built-in repair mechanisms 37 are designed to restore consistency after an unclean shutdown.atomic-write-file: This crate provides atomicity for individual file writes, preventing corrupted files on disk. It handles temporary file creation and ensures that these temporary files are automatically cleaned up on normal program exit or Rust panic.44Comprehensive Error Handling: Implement Rust's Result type and the ? operator throughout the application to gracefully propagate and handle errors. Custom error types can be defined to provide more specific context and enable more precise error recovery or reporting.Performance Tuning and Optimization TechniquesAchieving high performance requires continuous optimization and careful configuration.Batching Writes: For bulk ingestion or updates of obsolescence records, it is highly beneficial to batch multiple redb inserts or updates into a single WriteTransaction.74 This significantly reduces the overhead associated with transaction commits, as redb's WriteStrategy::Throughput can be particularly effective for large transactions.74Optimal redb Configuration: Experimentation with redb's page size and region size might be necessary if default performance is not sufficient for specific workloads.37 These parameters can influence I/O patterns and memory usage.Minimizing Write Amplification: While redb's Copy-on-Write B-trees inherently help manage writes, it is important to understand that high random write workloads on Solid-State Drives (SSDs) can still lead to increased write amplification.38 The choice of underlying filesystem (e.g., ZFS or Btrfs, which implement COW at the filesystem level) can also influence the overall write amplification factor.Zero-Copy Reads: redb supports zero-copy reads 73, which is a significant performance advantage. This capability minimizes CPU cycles spent on copying data from the database's internal buffers to application memory, directly improving read throughput.Caching: redb incorporates an internal cache 52 to minimize disk I/O for frequently accessed data. Ensuring that the operating system's page cache is effectively utilized by the underlying filesystem is also critical.Data Locality: When performing initial bulk loading into redb, inserting keys in sorted order can yield significant performance benefits by optimizing B-tree node writes and reducing random disk access.538. Conclusions and RecommendationsThe efficient organization of filesystem space for a large volume of JSON files marking package obsolescence, including replacement information, while minimizing write operations and enabling client access in a Rust application, presents a complex challenge. The proposed hybrid architecture, leveraging Content-Addressable Storage (CAS) with redb as the primary index and optional Merkle trees for comprehensive versioning, offers a robust and scalable solution.Summary of Key Recommendations for ImplementationAdopt the Hybrid CAS + redb Architecture: This approach provides optimal storage efficiency through deduplication, ensures data integrity, and delivers high performance for both reads and writes.Implement Strict FMRI Canonicalization: A well-defined and consistently applied canonicalization process for FMRIs is fundamental for accurate content hashing and reliable key lookups within redb.Utilize Binary Serialization for Storage: Convert JSON content into a compact binary format (e.g., MessagePack or CBOR) using serde before storage. This significantly reduces disk I/O and storage space, directly addressing the goals of minimizing writes and optimizing space.Leverage redb's MVCC and Single Writer Pattern: Employ redb's Multi-Version Concurrency Control for efficient concurrent read access. Funnel all write operations through a single, dedicated writer thread or an asynchronous worker pool (using tokio::task::spawn_blocking) to manage redb's single-writer constraint effectively and maintain application responsiveness.Integrate atomic-write-file for Robustness: Use this crate for all individual JSON blob writes to the CAS filesystem to guarantee atomicity and prevent data corruption in the event of system failures.Implement Merkle Trees for Comprehensive Versioning: Incorporate Merkle trees over the content hashes of the stored JSON blobs. This provides a verifiable, immutable history of the entire obsolescence dataset, allowing for efficient snapshots and integrity checks.Considerations for Scalability Beyond a Single NodeWhile the proposed architecture is highly efficient for a single-node deployment, future growth to petabyte-scale datasets or requirements for geo-distribution would necessitate further architectural evolution:Distributed CAS: For extreme scale or distributed environments, exploring dedicated distributed CAS systems like IPFS 30, Arvados Keep 30, or Infinit 30 would be a logical next step. These systems inherently manage data distribution, replication, and deduplication across multiple nodes.Sharding redb: For exceptionally high read/write throughput on the FMRI index, the redb database could be sharded across multiple instances or nodes. This would involve partitioning the FMRI key space and distributing the redb instances accordingly. However, this introduces significant complexity related to distributed transactions, consistency models, and operational management.Potential for Advanced Garbage Collection and Data Retention PoliciesEffective long-term management of obsolescence data involves not only efficient storage but also intelligent data lifecycle management:Time-to-Live (TTL) Policies: Implement TTL policies for obsolescence records or historical snapshots that are no longer actively needed.51 This would allow for periodic, automated cleanup of underlying CAS blobs that are no longer referenced by any active redb version or Merkle tree root, reclaiming disk space.redb Compaction: While redb has internal compaction mechanisms 76 to reclaim space and minimize fragmentation, explicit application-level policies might be needed to trigger or manage these operations for long-term data retention and storage optimization.Transitive Obsolescence Graph AnalysisThe replaces and obsoleted_by relationships within the proposed data model form a rich graph structure. Developing a dedicated service or module to build and query the full transitive obsolescence graph would add significant analytical value.25 This could involve:In-Memory Graph Representation: For smaller or frequently accessed portions of the graph.Specialized Graph Database Integration: For very large or complex transitive queries, integrating with a dedicated graph database (e.g., Neo4j, Dgraph) would provide powerful querying capabilities.Such a component would enable advanced queries, such as "What is the effective replacement for package X, considering its entire dependency chain?" or "Which currently active packages are transitively dependent on an obsolete package?" This capability extends beyond the immediate scope of efficient storage but is critical for comprehensive package lifecycle management and proactive risk mitigation. \ No newline at end of file diff --git a/doc/obsoleted_packages.md b/doc/obsoleted_packages.md new file mode 100644 index 0000000..1132335 --- /dev/null +++ b/doc/obsoleted_packages.md @@ -0,0 +1,432 @@ +# Obsoleted Packages in IPS + +This document describes the handling of obsoleted packages in the Image Packaging System (IPS). + +## Overview + +Obsoleted packages are packages that are no longer maintained or have been replaced by other packages. In previous versions of IPS, obsoleted packages were marked with the `pkg.obsolete` attribute in their manifest, but they remained in the main package repository. This approach had several drawbacks: + +1. Obsoleted packages cluttered the repository and catalog +2. They were still visible in package listings and searches +3. There was no structured way to store metadata about why a package was obsoleted or what replaced it + +The new approach stores obsoleted packages in a dedicated directory structure, separate from the main package repository. This provides several benefits: + +1. Keeps the main repository clean and focused on active packages +2. Provides a structured way to store metadata about obsoleted packages +3. Allows for better organization and management of obsoleted packages +4. Preserves the original manifest for reference + +## Directory Structure + +Obsoleted packages are stored in the following directory structure: + +``` +/obsoleted/ + / + / + .json # Metadata about the obsoleted package + .manifest # Original manifest of the obsoleted package +``` + +For example, an obsoleted package `pkg://openindiana.org/library/perl-5/postgres-dbi-5100@2.19.3,5.11-2014.0.1.1:20250628T100651Z` would be stored as: + +``` +/obsoleted/ + openindiana.org/ + library/perl-5/postgres-dbi-5100/ + 2.19.3%2C5.11-2014.0.1.1%3A20250628T100651Z.json + 2.19.3%2C5.11-2014.0.1.1%3A20250628T100651Z.manifest +``` + +## Metadata Format + +The metadata for an obsoleted package is stored in a JSON file with the following structure: + +```json +{ + "fmri": "pkg://openindiana.org/library/perl-5/postgres-dbi-5100@2.19.3,5.11-2014.0.1.1:20250628T100651Z", + "status": "obsolete", + "obsolescence_date": "2025-07-29T12:22:00Z", + "deprecation_message": "This package is deprecated. Use library/perl-5/postgres-dbi instead.", + "obsoleted_by": [ + "pkg://openindiana.org/library/perl-5/postgres-dbi@3.0.0" + ], + "metadata_version": 1, + "content_hash": "sha256-abc123def456..." +} +``` + +The fields in the metadata are: + +- `fmri`: The full FMRI (Fault Management Resource Identifier) of the obsoleted package +- `status`: Always "obsolete" for obsoleted packages +- `obsolescence_date`: The date when the package was marked as obsoleted +- `deprecation_message`: Optional message explaining why the package was obsoleted +- `obsoleted_by`: Optional list of FMRIs that replace this package +- `metadata_version`: Version of the metadata schema (currently 1) +- `content_hash`: Hash of the original manifest content for integrity verification + +## CLI Commands + +The following CLI commands are available for managing obsoleted packages: + +### Mark a Package as Obsoleted + +```bash +pkg6repo obsolete-package -s -p -f [-m ] [-r ...] +``` + +This command: +1. Moves the package from the main repository to the obsoleted directory +2. Creates metadata for the obsoleted package +3. Removes the package from the catalog +4. Rebuilds the repository metadata + +**Example:** +```bash +# Mark a package as obsoleted with a deprecation message and replacement package +pkg6repo obsolete-package -s /path/to/repo -p openindiana.org -f "pkg://openindiana.org/library/perl-5/postgres-dbi-5100@2.19.3" \ + -m "This package is deprecated. Use library/perl-5/postgres-dbi instead." \ + -r "pkg://openindiana.org/library/perl-5/postgres-dbi@3.0.0" +``` + +### List Obsoleted Packages + +```bash +pkg6repo list-obsoleted -s -p [-F ] [-H] [--page ] [--page-size ] +``` + +This command lists obsoleted packages for a publisher with optional pagination. The output format can be: +- `table` (default): Tabular format with columns for name, version, and publisher +- `json`: JSON format +- `tsv`: Tab-separated values + +Pagination parameters: +- `--page`: Page number (1-based, defaults to 1) +- `--page-size`: Number of packages per page (defaults to 100, use 0 for all packages) + +The output includes pagination information (current page, total pages, total count) in all formats. + +**Example:** +```bash +# List all obsoleted packages for a publisher in JSON format +pkg6repo list-obsoleted -s /path/to/repo -p openindiana.org -F json + +# List all obsoleted packages for a publisher in table format without headers +pkg6repo list-obsoleted -s /path/to/repo -p openindiana.org -H + +# List obsoleted packages with pagination (page 2, 20 packages per page) +pkg6repo list-obsoleted -s /path/to/repo -p openindiana.org --page 2 --page-size 20 + +# List all obsoleted packages in a single page +pkg6repo list-obsoleted -s /path/to/repo -p openindiana.org --page-size 0 +``` + +### Search Obsoleted Packages + +```bash +pkg6repo search-obsoleted -s -p -q [-F ] [-H] [-n ] +``` + +This command searches for obsoleted packages that match a pattern. The pattern can be a simple substring or a regular expression. + +**Example:** +```bash +# Search for obsoleted packages containing "perl" in the name or FMRI +pkg6repo search-obsoleted -s /path/to/repo -p openindiana.org -q "perl" + +# Search with a regular expression and limit results to 10 +pkg6repo search-obsoleted -s /path/to/repo -p openindiana.org -q "^library/.*" -n 10 +``` + +### Show Obsoleted Package Details + +```bash +pkg6repo show-obsoleted -s -p -f [-F ] +``` + +This command shows detailed information about an obsoleted package, including: +- FMRI +- Status +- Obsolescence date +- Deprecation message (if any) +- Replacement packages (if any) +- Metadata version +- Content hash + +**Example:** +```bash +# Show details of an obsoleted package in JSON format +pkg6repo show-obsoleted -s /path/to/repo -p openindiana.org \ + -f "pkg://openindiana.org/library/perl-5/postgres-dbi-5100@2.19.3" -F json +``` + +### Restore an Obsoleted Package + +```bash +pkg6repo restore-obsoleted -s -p -f [--no-rebuild] +``` + +This command restores an obsoleted package to the main repository: +1. Retrieves the original manifest from the obsoleted package +2. Creates a transaction in the main repository +3. Adds the package to the transaction +4. Commits the transaction +5. Removes the obsoleted package from the obsoleted packages directory +6. Rebuilds the catalog (unless `--no-rebuild` is specified) + +**Example:** +```bash +# Restore an obsoleted package to the main repository +pkg6repo restore-obsoleted -s /path/to/repo -p openindiana.org \ + -f "pkg://openindiana.org/library/perl-5/postgres-dbi-5100@2.19.3" +``` + +### Export Obsoleted Packages + +```bash +pkg6repo export-obsoleted -s -p -o [-q ] +``` + +This command exports obsoleted packages to a JSON file that can be imported into another repository. + +**Example:** +```bash +# Export all obsoleted packages for a publisher +pkg6repo export-obsoleted -s /path/to/repo -p openindiana.org -o /path/to/export.json + +# Export only obsoleted packages matching a pattern +pkg6repo export-obsoleted -s /path/to/repo -p openindiana.org -o /path/to/export.json -q "perl" +``` + +### Import Obsoleted Packages + +```bash +pkg6repo import-obsoleted -s -i [-p ] +``` + +This command imports obsoleted packages from a JSON file created by `export-obsoleted`. + +**Example:** +```bash +# Import obsoleted packages from a file +pkg6repo import-obsoleted -s /path/to/repo -i /path/to/export.json + +# Import obsoleted packages and override the publisher +pkg6repo import-obsoleted -s /path/to/repo -i /path/to/export.json -p new-publisher +``` + +## Importing Obsoleted Packages + +When importing packages from a pkg5 repository, packages with the `pkg.obsolete` attribute are automatically detected and stored in the obsoleted directory instead of the main repository. This ensures that obsoleted packages are properly handled during import. + +## API + +The following classes and methods are available for programmatically managing obsoleted packages: + +### ObsoletedPackageManager + +This class manages obsoleted packages in the repository: + +``` +pub struct ObsoletedPackageManager { + base_path: PathBuf, +} + +impl ObsoletedPackageManager { + // Create a new ObsoletedPackageManager + pub fn new>(repo_path: P) -> Self; + + // Initialize the obsoleted packages directory structure + pub fn init(&self) -> Result<()>; + + // Store an obsoleted package + pub fn store_obsoleted_package( + &self, + publisher: &str, + fmri: &Fmri, + manifest_content: &str, + obsoleted_by: Option>, + deprecation_message: Option, + ) -> Result; + + // Check if a package is obsoleted + pub fn is_obsoleted(&self, publisher: &str, fmri: &Fmri) -> bool; + + // Get metadata for an obsoleted package + pub fn get_obsoleted_package_metadata( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result>; + + // List all obsoleted packages for a publisher + pub fn list_obsoleted_packages(&self, publisher: &str) -> Result>; + + // Search for obsoleted packages by pattern + pub fn search_obsoleted_packages( + &self, + publisher: &str, + pattern: &str, + ) -> Result>; + + // Get and remove an obsoleted package + pub fn get_and_remove_obsoleted_package( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result; + + // Export obsoleted packages to a file + pub fn export_obsoleted_packages( + &self, + publisher: &str, + pattern: Option<&str>, + output_file: &Path, + ) -> Result; + + // Import obsoleted packages from a file + pub fn import_obsoleted_packages( + &self, + input_file: &Path, + override_publisher: Option<&str>, + ) -> Result; +} +``` + +### ObsoletedPackageMetadata + +This struct represents metadata for an obsoleted package: + +``` +pub struct ObsoletedPackageMetadata { + pub fmri: String, + pub status: String, + pub obsolescence_date: String, + pub deprecation_message: Option, + pub obsoleted_by: Option>, + pub metadata_version: u32, + pub content_hash: String, +} +``` + +## Integration with Repository Operations + +The obsoleted package system is integrated with the following repository operations: + +1. **Package Listing**: Obsoleted packages are excluded from regular package listings +2. **Catalog Building**: Obsoleted packages are excluded from the catalog +3. **Search**: Obsoleted packages are excluded from search results + +This ensures that obsoleted packages don't clutter the repository and are properly managed. + +## Best Practices for Managing Obsoleted Packages + +Here are some best practices for managing obsoleted packages: + +### When to Mark a Package as Obsoleted + +- **Package is no longer maintained**: When a package is no longer being maintained or updated +- **Package has been replaced**: When a package has been replaced by a newer version or a different package +- **Package is deprecated**: When a package is deprecated and should not be used in new installations +- **Package has security vulnerabilities**: When a package has security vulnerabilities and should not be used + +### Providing Useful Metadata + +- **Always include a deprecation message**: Explain why the package is obsoleted and what users should do instead +- **Specify replacement packages**: If the package has been replaced, specify the replacement package(s) +- **Be specific about versions**: If only certain versions are obsoleted, be clear about which ones + +### Managing Large Numbers of Obsoleted Packages + +- **Use batch operations**: Use the export/import commands to manage large numbers of obsoleted packages +- **Use search to find related packages**: Use the search command to find related packages that might also need to be obsoleted +- **Organize by publisher**: Keep obsoleted packages organized by publisher + +### Repository Maintenance + +- **Regularly clean up obsoleted packages**: Remove obsoleted packages that are no longer needed +- **Export obsoleted packages before repository cleanup**: Export obsoleted packages before cleaning up a repository +- **Rebuild catalogs after bulk operations**: Rebuild catalogs after bulk operations to ensure consistency + +## Troubleshooting + +Here are solutions to common issues when working with obsoleted packages: + +### Package Not Found in Obsoleted Directory + +**Issue**: A package that was marked as obsoleted cannot be found in the obsoleted directory. + +**Solution**: +1. Check that the FMRI is correct, including the version and timestamp +2. Verify that the publisher name is correct +3. Use the `search-obsoleted` command with a broader pattern to find similar packages +4. Check the repository logs for any errors during the obsolete operation + +### Errors During Import/Export + +**Issue**: Errors occur when importing or exporting obsoleted packages. + +**Solution**: +1. Ensure the input/output file paths are correct and writable +2. Check that the repository exists and is accessible +3. Verify that the publisher exists in the repository +4. For import errors, check that the JSON file is valid and has the correct format + +### Catalog Issues After Restoring Packages + +**Issue**: Catalog issues after restoring obsoleted packages to the main repository. + +**Solution**: +1. Rebuild the catalog manually using `pkg6repo rebuild` +2. Check for any errors during the rebuild process +3. Verify that the package was properly restored to the main repository +4. Check for any conflicts with existing packages + +### Performance Issues with Large Repositories + +**Issue**: Performance issues when working with large repositories with many obsoleted packages. + +**Solution**: +1. Use the search command with specific patterns to limit the number of packages processed +2. Use pagination when listing or searching for obsoleted packages +3. Export obsoleted packages to separate files by category or pattern +4. Consider using a more powerful machine for repository operations + +## Workflow Diagram + +Here's a simplified workflow for managing obsoleted packages: + +``` + +-------------------+ + | Active Repository | + +-------------------+ + | + | obsolete-package + v + +-------------------+ + | Obsoleted Storage | + +-------------------+ + | + | (manage) + v + +------------------------------------------+ + | | + +-----------+-----------+ +-----------+-----------+ + | list-obsoleted | | search-obsoleted | + | show-obsoleted | | export-obsoleted | + +-----------------------+ +-----------------------+ + | | + v v + +-----------------------+ +-----------------------+ + | restore-obsoleted | | import-obsoleted | + +-----------------------+ +-----------------------+ + | | + v v + +-----------------------+ +-----------------------+ + | Back to Active Repo | | Different Repository | + +-----------------------+ +-----------------------+ +``` + +This diagram illustrates the flow of packages between the active repository and the obsoleted storage, as well as the various commands used to manage obsoleted packages. \ No newline at end of file diff --git a/libips/Cargo.toml b/libips/Cargo.toml index 0ffac0d..855522e 100644 --- a/libips/Cargo.toml +++ b/libips/Cargo.toml @@ -36,3 +36,9 @@ diff-struct = "0.5.3" chrono = "0.4.41" tempfile = "3.20.0" walkdir = "2.4.0" +redb = "1.5.0" +bincode = "1.3.3" + +[features] +default = ["redb-index"] +redb-index = [] # Enable redb-based index for obsoleted packages diff --git a/libips/src/repository/file_backend.rs b/libips/src/repository/file_backend.rs index 52891da..4b84ef7 100644 --- a/libips/src/repository/file_backend.rs +++ b/libips/src/repository/file_backend.rs @@ -221,6 +221,8 @@ pub struct FileBackend { /// Catalog manager for handling catalog operations /// Uses RefCell for interior mutability to allow mutation through immutable references catalog_manager: Option>, + /// Manager for obsoleted packages + obsoleted_manager: Option>, } /// Format a SystemTime as an ISO 8601 timestamp string @@ -616,6 +618,7 @@ impl ReadableRepository for FileBackend { path: path.to_path_buf(), config, catalog_manager: None, + obsoleted_manager: None, }) } @@ -1295,6 +1298,7 @@ impl WritableRepository for FileBackend { path: path.to_path_buf(), config, catalog_manager: None, + obsoleted_manager: None, }; // Create the repository directories @@ -1582,19 +1586,27 @@ impl FileBackend { } // If the publisher is not set in the FMRI, use the current publisher - if parsed_fmri.publisher.is_none() { + let final_fmri = if parsed_fmri.publisher.is_none() { let mut fmri_with_publisher = parsed_fmri.clone(); fmri_with_publisher.publisher = Some(publisher.to_string()); - - // Create a PackageInfo struct and add it to the list - packages.push(PackageInfo { - fmri: fmri_with_publisher, - }); + fmri_with_publisher } else { + parsed_fmri.clone() + }; + + // Check if the package is obsoleted + let is_obsoleted = if let Some(obsoleted_manager) = &self.obsoleted_manager { + obsoleted_manager.borrow().is_obsoleted(publisher, &final_fmri) + } else { + false + }; + + // Only add the package if it's not obsoleted + if !is_obsoleted { // Create a PackageInfo struct and add it to the list packages.push(PackageInfo { - fmri: parsed_fmri.clone(), + fmri: final_fmri, }); } @@ -1635,6 +1647,7 @@ impl FileBackend { fs::create_dir_all(self.path.join("index"))?; fs::create_dir_all(self.path.join("pkg"))?; fs::create_dir_all(self.path.join("trans"))?; + fs::create_dir_all(self.path.join("obsoleted"))?; Ok(()) } @@ -1960,6 +1973,23 @@ impl FileBackend { // This is safe because we just checked that catalog_manager is Some Ok(self.catalog_manager.as_ref().unwrap().borrow_mut()) } + + /// Get or initialize the obsoleted package manager + /// + /// This method returns a mutable reference to the obsoleted package manager. + /// It uses interior mutability with RefCell to allow mutation through an immutable reference. + pub fn get_obsoleted_manager( + &mut self, + ) -> Result> { + if self.obsoleted_manager.is_none() { + let manager = crate::repository::obsoleted::ObsoletedPackageManager::new(&self.path); + let refcell = std::cell::RefCell::new(manager); + self.obsoleted_manager = Some(refcell); + } + + // This is safe because we just checked that obsoleted_manager is Some + Ok(self.obsoleted_manager.as_ref().unwrap().borrow_mut()) + } /// URL encode a string for use in a filename fn url_encode(s: &str) -> String { diff --git a/libips/src/repository/mod.rs b/libips/src/repository/mod.rs index 1b0e79d..1c60232 100644 --- a/libips/src/repository/mod.rs +++ b/libips/src/repository/mod.rs @@ -155,8 +155,52 @@ impl From for RepositoryError { } } +// Implement From for redb error types +impl From for RepositoryError { + fn from(err: redb::Error) -> Self { + RepositoryError::Other(format!("Database error: {}", err)) + } +} + +impl From for RepositoryError { + fn from(err: redb::DatabaseError) -> Self { + RepositoryError::Other(format!("Database error: {}", err)) + } +} + +impl From for RepositoryError { + fn from(err: redb::TransactionError) -> Self { + RepositoryError::Other(format!("Transaction error: {}", err)) + } +} + +impl From for RepositoryError { + fn from(err: redb::TableError) -> Self { + RepositoryError::Other(format!("Table error: {}", err)) + } +} + +impl From for RepositoryError { + fn from(err: redb::StorageError) -> Self { + RepositoryError::Other(format!("Storage error: {}", err)) + } +} + +impl From for RepositoryError { + fn from(err: redb::CommitError) -> Self { + RepositoryError::Other(format!("Commit error: {}", err)) + } +} + +impl From for RepositoryError { + fn from(err: bincode::Error) -> Self { + RepositoryError::Other(format!("Serialization error: {}", err)) + } +} + mod catalog; mod file_backend; +mod obsoleted; mod rest_backend; #[cfg(test)] mod tests; @@ -167,6 +211,7 @@ pub use catalog::{ CatalogAttrs, CatalogError, CatalogManager, CatalogOperationType, CatalogPart, UpdateLog, }; pub use file_backend::FileBackend; +pub use obsoleted::{ObsoletedPackageManager, ObsoletedPackageMetadata}; pub use rest_backend::RestBackend; /// Repository configuration filename diff --git a/libips/src/repository/obsoleted.rs b/libips/src/repository/obsoleted.rs new file mode 100644 index 0000000..684485b --- /dev/null +++ b/libips/src/repository/obsoleted.rs @@ -0,0 +1,2399 @@ +use crate::fmri::Fmri; +use crate::repository::{Result, RepositoryError}; +use bincode::{deserialize, serialize}; +use miette::Diagnostic; +use regex::Regex; +use redb::{Database, ReadableTable, TableDefinition}; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::RwLock; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use sha2::Digest; +use thiserror::Error; +use tracing::{debug, error, info, warn}; + +/// Format a SystemTime as an ISO 8601 timestamp string +fn format_timestamp(time: &SystemTime) -> String { + let duration = time + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_else(|_| Duration::from_secs(0)); + + let secs = duration.as_secs(); + let micros = duration.subsec_micros(); + + // Format as ISO 8601 with microsecond precision + format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}.{:06}Z", + // Convert seconds to date and time components + 1970 + secs / 31536000, // year (approximate) + (secs % 31536000) / 2592000 + 1, // month (approximate) + (secs % 2592000) / 86400 + 1, // day (approximate) + (secs % 86400) / 3600, // hour + (secs % 3600) / 60, // minute + secs % 60, // second + micros // microseconds + ) +} + +/// Represents an obsoleted package in an export file +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ObsoletedPackageExport { + /// The publisher of the package + pub publisher: String, + /// The FMRI of the package + pub fmri: String, + /// The metadata for the package + pub metadata: ObsoletedPackageMetadata, + /// The manifest content + pub manifest: String, +} + +/// Represents a collection of obsoleted packages in an export file +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ObsoletedPackagesExport { + /// The version of the export format + pub version: u32, + /// The date when the export was created + pub export_date: String, + /// The packages in the export + pub packages: Vec, +} + +/// Errors that can occur in obsoleted package operations +#[derive(Debug, Error, Diagnostic)] +pub enum ObsoletedPackageError { + #[error("obsoleted package not found: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::not_found), + help("Check that the package exists in the obsoleted packages directory") + )] + NotFound(String), + + #[error("failed to read obsoleted package metadata: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::metadata_read), + help("Check that the metadata file exists and is valid JSON") + )] + MetadataReadError(String), + + #[error("failed to read obsoleted package manifest: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::manifest_read), + help("Check that the manifest file exists and is readable") + )] + ManifestReadError(String), + + #[error("failed to parse obsoleted package metadata: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::metadata_parse), + help("Check that the metadata file contains valid JSON") + )] + MetadataParseError(String), + + #[error("failed to parse FMRI: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::fmri_parse), + help("Check that the FMRI is valid") + )] + FmriParseError(String), + + #[error("I/O error: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::io), + help("Check system resources and permissions") + )] + IoError(String), + + #[error("failed to remove obsoleted package: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::remove), + help("Check that the package exists and is not in use") + )] + RemoveError(String), + + #[error("invalid pagination parameters: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::pagination), + help("Check that the page number and page size are valid") + )] + PaginationError(String), + + #[error("search pattern error: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::search), + help("Check that the search pattern is valid") + )] + SearchPatternError(String), + + #[error("index error: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::index), + help("An error occurred with the obsoleted package index") + )] + IndexError(String), + + #[error("cache error: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::cache), + help("An error occurred with the obsoleted package cache") + )] + CacheError(String), + + #[error("database error: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::database), + help("An error occurred with the obsoleted package database") + )] + DatabaseError(String), + + #[error("serialization error: {0}")] + #[diagnostic( + code(ips::obsoleted_package_error::serialization), + help("An error occurred while serializing or deserializing data") + )] + SerializationError(String), +} + +// Implement From for common error types to make error conversion easier +impl From for ObsoletedPackageError { + fn from(err: std::io::Error) -> Self { + ObsoletedPackageError::IoError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: serde_json::Error) -> Self { + ObsoletedPackageError::MetadataParseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: crate::fmri::FmriError) -> Self { + ObsoletedPackageError::FmriParseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: redb::Error) -> Self { + ObsoletedPackageError::DatabaseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: redb::DatabaseError) -> Self { + ObsoletedPackageError::DatabaseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: redb::TransactionError) -> Self { + ObsoletedPackageError::DatabaseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: redb::TableError) -> Self { + ObsoletedPackageError::DatabaseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: redb::StorageError) -> Self { + ObsoletedPackageError::DatabaseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: redb::CommitError) -> Self { + ObsoletedPackageError::DatabaseError(err.to_string()) + } +} + +impl From for ObsoletedPackageError { + fn from(err: bincode::Error) -> Self { + ObsoletedPackageError::SerializationError(err.to_string()) + } +} + +// Implement From for RepositoryError to allow conversion +// This makes it easier to use ObsoletedPackageError with the existing Result type +impl From for RepositoryError { + fn from(err: ObsoletedPackageError) -> Self { + match err { + ObsoletedPackageError::NotFound(msg) => RepositoryError::NotFound(msg), + ObsoletedPackageError::IoError(msg) => RepositoryError::IoError(std::io::Error::new(std::io::ErrorKind::Other, msg)), + _ => RepositoryError::Other(err.to_string()), + } + } +} + +/// Represents a paginated result of obsoleted packages +#[derive(Debug, Clone)] +pub struct PaginatedObsoletedPackages { + /// The list of obsoleted packages for the current page + pub packages: Vec, + /// The total number of obsoleted packages + pub total_count: usize, + /// The current page number (1-based) + pub page: usize, + /// The number of packages per page + pub page_size: usize, + /// The total number of pages + pub total_pages: usize, +} + +/// Key used for indexing obsoleted packages +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +struct ObsoletedPackageKey { + /// Publisher name + publisher: String, + /// Package stem (name) + stem: String, + /// Package version + version: String, +} + +impl ObsoletedPackageKey { + /// Create a new ObsoletedPackageKey from a publisher and FMRI + fn new(publisher: &str, fmri: &Fmri) -> Self { + Self { + publisher: publisher.to_string(), + stem: fmri.stem().to_string(), + version: fmri.version().to_string(), + } + } + + /// Create a new ObsoletedPackageKey from components + fn from_components(publisher: &str, stem: &str, version: &str) -> Self { + Self { + publisher: publisher.to_string(), + stem: stem.to_string(), + version: version.to_string(), + } + } +} + + +// Table definitions for the redb database +// Table for mapping FMRI to content hash +static FMRI_TO_HASH_TABLE: TableDefinition<&[u8], &str> = TableDefinition::new("fmri_to_hash"); +// Table for mapping content hash to metadata +static HASH_TO_METADATA_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("hash_to_metadata"); +// Table for mapping content hash to manifest +static HASH_TO_MANIFEST_TABLE: TableDefinition<&str, &str> = TableDefinition::new("hash_to_manifest"); + +/// Index of obsoleted packages using redb for faster lookups and content-addressable storage +#[derive(Debug)] +struct RedbObsoletedPackageIndex { + /// The redb database + db: Database, + /// Last time the index was accessed + last_accessed: Instant, + /// Whether the index is dirty and needs to be rebuilt + dirty: bool, + /// Maximum age of the index before it needs to be rebuilt (in seconds) + max_age: Duration, +} + +impl RedbObsoletedPackageIndex { + /// Create a new RedbObsoletedPackageIndex + fn new>(base_path: P) -> Result { + let db_path = base_path.as_ref().join("index.redb"); + debug!("Creating redb database at {}", db_path.display()); + + // Create the database + let db = Database::create(&db_path)?; + + // Create the tables if they don't exist + let write_txn = db.begin_write()?; + { + write_txn.open_table(FMRI_TO_HASH_TABLE)?; + write_txn.open_table(HASH_TO_METADATA_TABLE)?; + write_txn.open_table(HASH_TO_MANIFEST_TABLE)?; + } + write_txn.commit()?; + + Ok(Self { + db, + last_accessed: Instant::now(), + dirty: false, + max_age: Duration::from_secs(300), // 5 minutes + }) + } + + /// Check if the index is stale and needs to be rebuilt + fn is_stale(&self) -> bool { + self.dirty || self.last_accessed.elapsed() > self.max_age + } + + /// Create an empty temporary file-based RedbObsoletedPackageIndex + /// + /// This is used as a fallback when the database creation fails. + /// It creates a database in a temporary directory that can be used temporarily. + fn empty() -> Self { + debug!("Creating empty temporary file-based redb database"); + + // Create a temporary directory + let temp_dir = tempfile::tempdir().unwrap_or_else(|e| { + error!("Failed to create temporary directory: {}", e); + panic!("Failed to create temporary directory: {}", e); + }); + + // Create a database file in the temporary directory + let db_path = temp_dir.path().join("empty.redb"); + + // Create the database + let db = Database::create(&db_path).unwrap_or_else(|e| { + error!("Failed to create temporary database: {}", e); + panic!("Failed to create temporary database: {}", e); + }); + + // Create the tables + let write_txn = db.begin_write().unwrap(); + { + let _ = write_txn.open_table(FMRI_TO_HASH_TABLE).unwrap(); + let _ = write_txn.open_table(HASH_TO_METADATA_TABLE).unwrap(); + let _ = write_txn.open_table(HASH_TO_MANIFEST_TABLE).unwrap(); + } + write_txn.commit().unwrap(); + + Self { + db, + last_accessed: Instant::now(), + dirty: false, + max_age: Duration::from_secs(300), // 5 minutes + } + } + + /// Open an existing RedbObsoletedPackageIndex + fn open>(base_path: P) -> Result { + let db_path = base_path.as_ref().join("index.redb"); + debug!("Opening redb database at {}", db_path.display()); + + // Open the database + let db = Database::open(&db_path)?; + + Ok(Self { + db, + last_accessed: Instant::now(), + dirty: false, + max_age: Duration::from_secs(300), // 5 minutes + }) + } + + /// Create or open a RedbObsoletedPackageIndex + fn create_or_open>(base_path: P) -> Result { + let db_path = base_path.as_ref().join("index.redb"); + + if db_path.exists() { + Self::open(base_path) + } else { + Self::new(base_path) + } + } + + /// Add an entry to the index + fn add_entry(&self, key: &ObsoletedPackageKey, metadata: &ObsoletedPackageMetadata, manifest: &str) -> Result<()> { + // Calculate content hash if not already present + let content_hash = if metadata.content_hash.is_empty() { + let mut hasher = sha2::Sha256::new(); + hasher.update(manifest.as_bytes()); + format!("sha256-{:x}", hasher.finalize()) + } else { + metadata.content_hash.clone() + }; + + // Serialize the key and metadata + let key_bytes = serialize(key)?; + let metadata_bytes = serialize(metadata)?; + + // Begin write transaction + let write_txn = self.db.begin_write()?; + { + // Open the tables + let mut fmri_to_hash = write_txn.open_table(FMRI_TO_HASH_TABLE)?; + let mut hash_to_metadata = write_txn.open_table(HASH_TO_METADATA_TABLE)?; + let mut hash_to_manifest = write_txn.open_table(HASH_TO_MANIFEST_TABLE)?; + + // Insert the entries + fmri_to_hash.insert(key_bytes.as_slice(), content_hash.as_str())?; + hash_to_metadata.insert(content_hash.as_str(), metadata_bytes.as_slice())?; + hash_to_manifest.insert(content_hash.as_str(), manifest)?; + } + write_txn.commit()?; + + Ok(()) + } + + /// Remove an entry from the index + fn remove_entry(&self, key: &ObsoletedPackageKey) -> Result { + // Serialize the key + let key_bytes = serialize(key)?; + + // First, check if the key exists and get the content hash + let content_hash_option = { + let read_txn = self.db.begin_read()?; + let fmri_to_hash = read_txn.open_table(FMRI_TO_HASH_TABLE)?; + + // Get the hash value and convert it to a string before the transaction is dropped + let result = match fmri_to_hash.get(key_bytes.as_slice())? { + Some(hash) => { + let hash_str = hash.value().to_string(); + Some(hash_str) + }, + None => None, + }; + + // Return the result, which doesn't depend on the transaction anymore + result + }; + + // If the key doesn't exist, return early + if content_hash_option.is_none() { + return Ok(false); + } + + let content_hash = content_hash_option.unwrap(); + + // Check if there are any other entries pointing to the same content hash + let has_other_references = { + let read_txn = self.db.begin_read()?; + let fmri_to_hash = read_txn.open_table(FMRI_TO_HASH_TABLE)?; + + // Create a vector to store all keys and hashes + let mut entries = Vec::new(); + let mut iter = fmri_to_hash.iter()?; + + // Collect all entries + while let Some(entry) = iter.next() { + let (entry_key, hash) = entry?; + entries.push((entry_key.value().to_vec(), hash.value().to_string())); + } + + // Drop the transaction and iterator before processing the entries + drop(iter); + drop(fmri_to_hash); + drop(read_txn); + + // Now check if there are any other entries with the same hash + let mut has_refs = false; + for (entry_key, hash) in entries { + // Skip the key we're removing + if entry_key != key_bytes.as_slice() && hash == content_hash { + has_refs = true; + break; + } + } + + has_refs + }; + + // Now perform the actual removal + let write_txn = self.db.begin_write()?; + { + // Remove the entry from fmri_to_hash + let mut fmri_to_hash = write_txn.open_table(FMRI_TO_HASH_TABLE)?; + fmri_to_hash.remove(key_bytes.as_slice())?; + + // If there are no other references to the content hash, remove the metadata and manifest + if !has_other_references { + let mut hash_to_metadata = write_txn.open_table(HASH_TO_METADATA_TABLE)?; + let mut hash_to_manifest = write_txn.open_table(HASH_TO_MANIFEST_TABLE)?; + + hash_to_metadata.remove(content_hash.as_str())?; + hash_to_manifest.remove(content_hash.as_str())?; + } + } + + write_txn.commit()?; + + Ok(true) + } + + /// Get an entry from the index + fn get_entry(&self, key: &ObsoletedPackageKey) -> Result> { + // Serialize the key + let key_bytes = serialize(key)?; + + // First, get the content hash + let content_hash = { + let read_txn = self.db.begin_read()?; + let fmri_to_hash = read_txn.open_table(FMRI_TO_HASH_TABLE)?; + + // Get the hash and convert to a string before the transaction is dropped + let result = match fmri_to_hash.get(key_bytes.as_slice())? { + Some(hash) => Some(hash.value().to_string()), + None => None, + }; + + // Return the result, which doesn't depend on the transaction anymore + result + }; + + // If the content hash is not found, return None + let content_hash = match content_hash { + Some(hash) => hash, + None => return Ok(None), + }; + + // Now get the metadata and manifest + let (metadata_bytes, manifest_str) = { + let read_txn = self.db.begin_read()?; + let hash_to_metadata = read_txn.open_table(HASH_TO_METADATA_TABLE)?; + let hash_to_manifest = read_txn.open_table(HASH_TO_MANIFEST_TABLE)?; + + // Get the metadata bytes + let metadata_bytes = match hash_to_metadata.get(content_hash.as_str())? { + Some(bytes) => bytes.value().to_vec(), + None => return Ok(None), + }; + + // Get the manifest string + let manifest_str = match hash_to_manifest.get(content_hash.as_str())? { + Some(manifest) => manifest.value().to_string(), + None => return Ok(None), + }; + + // Return the results, which don't depend on the transaction anymore + (metadata_bytes, manifest_str) + }; + + // Deserialize the metadata + let metadata: ObsoletedPackageMetadata = deserialize(&metadata_bytes)?; + + Ok(Some((metadata, manifest_str))) + } + + /// Get all entries in the index + fn get_all_entries(&self) -> Result> { + // First, collect all key-hash pairs + let key_hash_pairs = { + let read_txn = self.db.begin_read()?; + let fmri_to_hash = read_txn.open_table(FMRI_TO_HASH_TABLE)?; + + let mut pairs = Vec::new(); + let mut iter = fmri_to_hash.iter()?; + + while let Some(entry) = iter.next() { + let (key_bytes, hash) = entry?; + // Convert to owned types before the transaction is dropped + let key_data = key_bytes.value().to_vec(); + let hash_str = hash.value().to_string(); + pairs.push((key_data, hash_str)); + } + + pairs + }; + + let mut entries = Vec::new(); + + // Process each key-hash pair + for (key_data, content_hash) in key_hash_pairs { + // Deserialize the key + let key: ObsoletedPackageKey = deserialize(&key_data)?; + + // Get the metadata and manifest for this content hash + let (metadata_bytes, manifest_str) = { + let read_txn = self.db.begin_read()?; + let hash_to_metadata = read_txn.open_table(HASH_TO_METADATA_TABLE)?; + let hash_to_manifest = read_txn.open_table(HASH_TO_MANIFEST_TABLE)?; + + // Get the metadata bytes + let metadata_bytes = match hash_to_metadata.get(content_hash.as_str())? { + Some(bytes) => bytes.value().to_vec(), + None => { + // Metadata not found, skip this entry + continue; + } + }; + + // Get the manifest string + let manifest_str = match hash_to_manifest.get(content_hash.as_str())? { + Some(manifest) => manifest.value().to_string(), + None => { + // Manifest isn't found, skip this entry + continue; + } + }; + + (metadata_bytes, manifest_str) + }; + + // Deserialize the metadata + let metadata: ObsoletedPackageMetadata = deserialize(&metadata_bytes)?; + + entries.push((key, metadata, manifest_str)); + } + + Ok(entries) + } + + /// Get entries matching a publisher + fn get_entries_by_publisher(&self, publisher: &str) -> Result> { + // Get all entries and filter by publisher + // This is more efficient than implementing a separate method with similar logic + let all_entries = self.get_all_entries()?; + + // Filter entries by publisher + let filtered_entries = all_entries + .into_iter() + .filter(|(key, _, _)| key.publisher == publisher) + .collect(); + + Ok(filtered_entries) + } + + /// Search for entries matching a pattern + #[allow(dead_code)] + fn search_entries(&self, publisher: &str, pattern: &str) -> Result> { + // Get entries for the publisher + let publisher_entries = self.get_entries_by_publisher(publisher)?; + + // Try to compile the pattern as a regex + let regex_result = Regex::new(pattern); + + // Filter entries based on the pattern + let filtered_entries = match regex_result { + Ok(regex) => { + // Filter entries using regex + publisher_entries + .into_iter() + .filter(|(key, metadata, _)| { + // Match against the FMRI string + regex.is_match(&metadata.fmri) || + // Match against the package name + regex.is_match(&key.stem) + }) + .collect() + }, + Err(_) => { + // If regex compilation fails, fall back to simple substring matching + publisher_entries + .into_iter() + .filter(|(key, metadata, _)| { + // Match against the FMRI string + metadata.fmri.contains(pattern) || + // Match against the package name + key.stem.contains(pattern) + }) + .collect() + } + }; + + Ok(filtered_entries) + } + + /// Clear the index + fn clear(&self) -> Result<()> { + // Begin a write transaction + let write_txn = self.db.begin_write()?; + { + // Clear all tables by removing all entries + // Since redb doesn't have a clear() method, we need to iterate and remove each key + + // Clear fmri_to_hash table + { + let mut fmri_to_hash = write_txn.open_table(FMRI_TO_HASH_TABLE)?; + let keys_to_remove = { + // First collect all keys in a separate scope + let read_txn = self.db.begin_read()?; + let fmri_to_hash_read = read_txn.open_table(FMRI_TO_HASH_TABLE)?; + let mut keys = Vec::new(); + let mut iter = fmri_to_hash_read.iter()?; + while let Some(entry) = iter.next() { + let (key, _) = entry?; + keys.push(key.value().to_vec()); + } + keys + }; + + // Then remove all keys + for key in keys_to_remove { + fmri_to_hash.remove(key.as_slice())?; + } + } + + // Clear hash_to_metadata table + { + let mut hash_to_metadata = write_txn.open_table(HASH_TO_METADATA_TABLE)?; + let keys_to_remove = { + // First collect all keys in a separate scope + let read_txn = self.db.begin_read()?; + let hash_to_metadata_read = read_txn.open_table(HASH_TO_METADATA_TABLE)?; + let mut keys = Vec::new(); + let mut iter = hash_to_metadata_read.iter()?; + while let Some(entry) = iter.next() { + let (key, _) = entry?; + keys.push(key.value().to_string()); + } + keys + }; + + // Then remove all keys + for key in keys_to_remove { + hash_to_metadata.remove(key.as_str())?; + } + } + + // Clear hash_to_manifest table + { + let mut hash_to_manifest = write_txn.open_table(HASH_TO_MANIFEST_TABLE)?; + let keys_to_remove = { + // First collect all keys in a separate scope + let read_txn = self.db.begin_read()?; + let hash_to_manifest_read = read_txn.open_table(HASH_TO_MANIFEST_TABLE)?; + let mut keys = Vec::new(); + let mut iter = hash_to_manifest_read.iter()?; + while let Some(entry) = iter.next() { + let (key, _) = entry?; + keys.push(key.value().to_string()); + } + keys + }; + + // Then remove all keys + for key in keys_to_remove { + hash_to_manifest.remove(key.as_str())?; + } + } + } + write_txn.commit()?; + + Ok(()) + } + + /// Get the number of entries in the index + fn len(&self) -> Result { + // Begin a read transaction + let read_txn = self.db.begin_read()?; + + // Open the fmri_to_hash table + let fmri_to_hash = read_txn.open_table(FMRI_TO_HASH_TABLE)?; + + // Count the entries + let mut count = 0; + let mut iter = fmri_to_hash.iter()?; + + // Iterate through all entries and count them + while let Some(entry_result) = iter.next() { + // Just check if the entry exists, we don't need to access its value + entry_result?; + count += 1; + } + + // Drop the iterator and table before returning + drop(iter); + drop(fmri_to_hash); + drop(read_txn); + + Ok(count) + } + + /// Check if the index is empty + #[allow(dead_code)] + fn is_empty(&self) -> Result { + Ok(self.len()? == 0) + } +} + + + +/// Represents metadata for an obsoleted package +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ObsoletedPackageMetadata { + /// The FMRI of the obsoleted package + pub fmri: String, + + /// The status of the package (always "obsolete") + pub status: String, + + /// The date when the package was obsoleted + pub obsolescence_date: String, + + /// A message explaining why the package was obsoleted + #[serde(skip_serializing_if = "Option::is_none")] + pub deprecation_message: Option, + + /// List of FMRIs that replace this package + #[serde(skip_serializing_if = "Option::is_none")] + pub obsoleted_by: Option>, + + /// Version of the metadata schema + pub metadata_version: u32, + + /// Hash of the original manifest content + pub content_hash: String, +} + + +impl ObsoletedPackageMetadata { + /// Create a new ObsoletedPackageMetadata instance + pub fn new( + fmri: &str, + content_hash: &str, + obsoleted_by: Option>, + deprecation_message: Option, + ) -> Self { + // Get the current time for obsolescence_date + let now = SystemTime::now(); + let since_epoch = now.duration_since(UNIX_EPOCH).unwrap(); + let obsolescence_date = format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", + 1970 + since_epoch.as_secs() / 31_536_000, + (since_epoch.as_secs() % 31_536_000) / 2_592_000 + 1, + ((since_epoch.as_secs() % 2_592_000) / 86_400) + 1, + (since_epoch.as_secs() % 86_400) / 3600, + (since_epoch.as_secs() % 3600) / 60, + since_epoch.as_secs() % 60 + ); + + Self { + fmri: fmri.to_string(), + status: "obsolete".to_string(), + obsolescence_date, + deprecation_message, + obsoleted_by, + metadata_version: 1, + content_hash: content_hash.to_string(), + } + } +} + +/// Manages obsoleted packages in the repository +pub struct ObsoletedPackageManager { + /// Base path for obsoleted packages + base_path: PathBuf, + /// Index of obsoleted packages for faster lookups using redb + index: RwLock, +} + +impl ObsoletedPackageManager { + /// Create a new ObsoletedPackageManager + pub fn new>(repo_path: P) -> Self { + let base_path = repo_path.as_ref().join("obsoleted"); + + let index = { + // Create or open the redb-based index + let redb_index = RedbObsoletedPackageIndex::create_or_open(&base_path).unwrap_or_else(|e| { + // Log the error and create an empty redb index + error!("Failed to create or open redb-based index: {}", e); + RedbObsoletedPackageIndex::empty() + }); + RwLock::new(redb_index) + }; + + Self { + base_path, + index, + } + } + + /// Initialize the obsoleted packages directory structure + pub fn init(&self) -> Result<()> { + debug!("Initializing obsoleted packages directory: {}", self.base_path.display()); + fs::create_dir_all(&self.base_path)?; + + // Initialize the index + self.build_index()?; + + Ok(()) + } + + /// Build the index of obsoleted packages + fn build_index(&self) -> Result<()> { + debug!("Building index of obsoleted packages"); + + // Get a write lock on the index + let index = self.index.write().map_err(|e| ObsoletedPackageError::IndexError(format!( + "Failed to acquire write lock on index: {}", e + )))?; + + // Clear the index + let _ = index.clear(); + + // Check if the base path exists + if !self.base_path.exists() { + debug!("Obsoleted packages directory does not exist: {}", self.base_path.display()); + return Ok(()); + } + + // Walk through the directory structure to find all obsoleted packages + for publisher_entry in fs::read_dir(&self.base_path) + .map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read obsoleted packages directory {}: {}", + self.base_path.display(), e + )))? + { + let publisher_entry = publisher_entry.map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read publisher entry: {}", e + )))?; + + let publisher_path = publisher_entry.path(); + if !publisher_path.is_dir() { + continue; + } + + let publisher = publisher_path.file_name() + .ok_or_else(|| ObsoletedPackageError::IoError(format!( + "Failed to get publisher name from path: {}", + publisher_path.display() + )))? + .to_string_lossy() + .to_string(); + + debug!("Indexing obsoleted packages for publisher: {}", publisher); + + // Walk through the package directories + for pkg_entry in fs::read_dir(&publisher_path) + .map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read publisher directory {}: {}", + publisher_path.display(), e + )))? + { + let pkg_entry = pkg_entry.map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read package entry: {}", e + )))?; + + let pkg_path = pkg_entry.path(); + if !pkg_path.is_dir() { + continue; + } + + let stem = pkg_path.file_name() + .ok_or_else(|| ObsoletedPackageError::IoError(format!( + "Failed to get package stem from path: {}", + pkg_path.display() + )))? + .to_string_lossy() + .to_string(); + + debug!("Indexing obsoleted package: {}", stem); + + // Walk through the version files + for version_entry in fs::read_dir(&pkg_path) + .map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read package directory {}: {}", + pkg_path.display(), e + )))? + { + let version_entry = version_entry.map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read version entry: {}", e + )))?; + + let version_path = version_entry.path(); + if !version_path.is_file() { + continue; + } + + // Check if this is a metadata file + if let Some(extension) = version_path.extension() { + if extension != "json" { + continue; + } + + // Extract the version from the filename + let filename = version_path.file_stem() + .ok_or_else(|| ObsoletedPackageError::IoError(format!( + "Failed to get version from path: {}", + version_path.display() + )))? + .to_string_lossy() + .to_string(); + + // Construct the manifest path + let manifest_path = pkg_path.join(format!("{}.manifest", filename)); + + // Get the last modified time of the metadata file + let metadata = fs::metadata(&version_path) + .map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to get metadata for file {}: {}", + version_path.display(), e + )))?; + + let _last_modified = metadata.modified() + .map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to get last modified time for file {}: {}", + version_path.display(), e + )))?; + + // Create an index entry + let key = ObsoletedPackageKey::from_components(&publisher, &stem, &filename); + + // Read the metadata file + let metadata_json = fs::read_to_string(&version_path) + .map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read metadata file {}: {}", + version_path.display(), e + )))?; + + // Parse the metadata + let metadata: ObsoletedPackageMetadata = serde_json::from_str(&metadata_json) + .map_err(|e| ObsoletedPackageError::MetadataParseError(format!( + "Failed to parse metadata from {}: {}", + version_path.display(), e + )))?; + + // Read the manifest file + let manifest_content = fs::read_to_string(&manifest_path) + .map_err(|e| ObsoletedPackageError::ManifestReadError(format!( + "Failed to read manifest file {}: {}", + manifest_path.display(), e + )))?; + + // Add the entry to the index + index.add_entry(&key, &metadata, &manifest_content)?; + } + } + } + } + + // Get the count of indexed packages, handling the Result + match index.len() { + Ok(count) => debug!("Indexed {} obsoleted packages", count), + Err(e) => warn!("Failed to get count of indexed packages: {}", e), + } + + Ok(()) + } + + /// Ensure the index is fresh, rebuilding it if necessary + fn ensure_index_is_fresh(&self) -> Result<()> { + // Get a read lock on the index to check if it's stale + let is_stale = { + let index = self.index.read().map_err(|e| ObsoletedPackageError::IndexError(format!( + "Failed to acquire read lock on index: {}", e + )))?; + + index.is_stale() + }; + + // If the index is stale, rebuild it + if is_stale { + debug!("Index is stale, rebuilding"); + self.build_index()?; + } + + Ok(()) + } + + /// Update an entry in the index + fn update_index_entry(&self, publisher: &str, fmri: &Fmri, metadata_path: &Path, manifest_path: &Path) -> Result<()> { + // Get a write lock on the index + let index = self.index.write().map_err(|e| ObsoletedPackageError::IndexError(format!( + "Failed to acquire write lock on index: {}", e + )))?; + + // Create the key + let key = ObsoletedPackageKey::new(publisher, fmri); + + // Read the metadata file + let metadata_json = fs::read_to_string(metadata_path) + .map_err(|e| ObsoletedPackageError::MetadataReadError(format!( + "Failed to read metadata file {}: {}", + metadata_path.display(), e + )))?; + + // Parse the metadata + let metadata: ObsoletedPackageMetadata = serde_json::from_str(&metadata_json) + .map_err(|e| ObsoletedPackageError::MetadataParseError(format!( + "Failed to parse metadata from {}: {}", + metadata_path.display(), e + )))?; + + // Read the manifest file + let manifest_content = fs::read_to_string(manifest_path) + .map_err(|e| ObsoletedPackageError::ManifestReadError(format!( + "Failed to read manifest file {}: {}", + manifest_path.display(), e + )))?; + + // Add the entry to the index + index.add_entry(&key, &metadata, &manifest_content)?; + + Ok(()) + } + + /// Store an obsoleted package + pub fn store_obsoleted_package( + &self, + publisher: &str, + fmri: &Fmri, + manifest_content: &str, + obsoleted_by: Option>, + deprecation_message: Option, + ) -> Result { + // Create a publisher directory if it doesn't exist + let publisher_dir = self.base_path.join(publisher); + fs::create_dir_all(&publisher_dir)?; + + // Calculate content hash + let mut hasher = sha2::Sha256::new(); + hasher.update(manifest_content.as_bytes()); + let content_hash = format!("sha256-{:x}", hasher.finalize()); + + // Create metadata + let metadata = ObsoletedPackageMetadata::new( + &fmri.to_string(), + &content_hash, + obsoleted_by, + deprecation_message, + ); + + // Construct path for the obsoleted package + let stem = fmri.stem(); + let version = fmri.version(); + let pkg_dir = publisher_dir.join(stem); + fs::create_dir_all(&pkg_dir)?; + + // URL encode the version to use as filename + let encoded_version = url_encode(&version); + let metadata_path = pkg_dir.join(format!("{}.json", encoded_version)); + + // Write metadata to file + let metadata_json = serde_json::to_string_pretty(&metadata)?; + fs::write(&metadata_path, metadata_json)?; + + // Store the original manifest alongside the metadata + let manifest_path = pkg_dir.join(format!("{}.manifest", encoded_version)); + fs::write(&manifest_path, manifest_content)?; + + // Update the index with this package + if let Ok(index) = self.index.write() { + let key = ObsoletedPackageKey::new(publisher, fmri); + if let Err(e) = index.add_entry(&key, &metadata, manifest_content) { + warn!("Failed to add package to index: {}", e); + } + } else { + warn!("Failed to acquire write lock on index, package not added to index: {}", fmri); + } + + info!("Stored obsoleted package: {}", fmri); + Ok(metadata_path) + } + + /// Check if a package is obsoleted + pub fn is_obsoleted(&self, publisher: &str, fmri: &Fmri) -> bool { + // First check the filesystem directly for faster results in tests + let stem = fmri.stem(); + let version = fmri.version(); + let encoded_version = url_encode(&version); + let metadata_path = self.base_path.join(publisher).join(stem).join(format!("{}.json", encoded_version)); + + if metadata_path.exists() { + return true; + } + + // Ensure the index is fresh + if let Err(e) = self.ensure_index_is_fresh() { + warn!("Failed to ensure index is fresh: {}", e); + // Already checked the filesystem above, so return false + return false; + } + + // Check the index + let key = ObsoletedPackageKey::new(publisher, fmri); + match self.index.read() { + Ok(index) => { + // Properly handle the Result returned by get_entry + match index.get_entry(&key) { + Ok(Some(_)) => true, + Ok(None) => false, + Err(e) => { + warn!("Error checking if package is obsoleted in index: {}", e); + false + } + } + }, + Err(e) => { + warn!("Failed to acquire read lock on index: {}", e); + // Already checked the filesystem above, so return false + false + } + } + } + + /// Get metadata for an obsoleted package + pub fn get_obsoleted_package_metadata( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result> { + // Ensure the index is fresh + if let Err(e) = self.ensure_index_is_fresh() { + warn!("Failed to ensure index is fresh: {}", e); + // Fall back to the filesystem check if the index is not available + return self.get_obsoleted_package_metadata_from_filesystem(publisher, fmri); + } + + // Check the index + let key = ObsoletedPackageKey::new(publisher, fmri); + + // Try to get a read lock on the index + let index_read_result = self.index.read(); + if let Err(e) = index_read_result { + warn!("Failed to acquire read lock on index: {}", e); + // Fall back to the filesystem check if the index is not available + return self.get_obsoleted_package_metadata_from_filesystem(publisher, fmri); + } + + let index = index_read_result.unwrap(); + + // Check if the package is in the index + match index.get_entry(&key) { + Ok(Some((metadata, _))) => { + // Return the metadata directly from the index + Ok(Some(metadata)) + }, + Ok(None) => { + // Package not found in the index, fall back to the filesystem check + self.get_obsoleted_package_metadata_from_filesystem(publisher, fmri) + }, + Err(e) => { + warn!("Failed to get entry from index: {}", e); + // Fall back to the filesystem check if there's an error + self.get_obsoleted_package_metadata_from_filesystem(publisher, fmri) + } + } + } + + /// Get metadata for an obsoleted package from the filesystem + fn get_obsoleted_package_metadata_from_filesystem( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result> { + let stem = fmri.stem(); + let version = fmri.version(); + let encoded_version = url_encode(&version); + let metadata_path = self.base_path.join(publisher).join(stem).join(format!("{}.json", encoded_version)); + let manifest_path = self.base_path.join(publisher).join(stem).join(format!("{}.manifest", encoded_version)); + + if !metadata_path.exists() { + debug!("Metadata file not found: {}", metadata_path.display()); + return Ok(None); + } + + // Read the metadata file + let metadata_json = fs::read_to_string(&metadata_path) + .map_err(|e| ObsoletedPackageError::MetadataReadError(format!( + "Failed to read metadata file {}: {}", + metadata_path.display(), e + )))?; + + // Parse the metadata JSON + let metadata: ObsoletedPackageMetadata = serde_json::from_str(&metadata_json) + .map_err(|e| ObsoletedPackageError::MetadataParseError(format!( + "Failed to parse metadata from {}: {}", + metadata_path.display(), e + )))?; + + // Update the index with this package + if metadata_path.exists() && manifest_path.exists() { + if let Err(e) = self.update_index_entry(publisher, fmri, &metadata_path, &manifest_path) { + warn!("Failed to update index entry: {}", e); + } + } + + Ok(Some(metadata)) + } + + /// Get the manifest content for an obsoleted package + /// + /// This method retrieves the original manifest content for an obsoleted package. + /// It can be used to restore the package to the main repository. + /// + /// # Arguments + /// + /// * `publisher` - The publisher of the obsoleted package + /// * `fmri` - The FMRI of the obsoleted package + /// + /// # Returns + /// + /// The manifest content as a string, or None if the package is not found + pub fn get_obsoleted_package_manifest( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result> { + // Ensure the index is fresh + if let Err(e) = self.ensure_index_is_fresh() { + warn!("Failed to ensure index is fresh: {}", e); + // Fall back to the filesystem check if the index is not available + return self.get_obsoleted_package_manifest_from_filesystem(publisher, fmri); + } + + // Check the index + let key = ObsoletedPackageKey::new(publisher, fmri); + + // Try to get a read lock on the index + let index_read_result = self.index.read(); + if let Err(e) = index_read_result { + warn!("Failed to acquire read lock on index: {}", e); + // Fall back to the filesystem check if the index is not available + return self.get_obsoleted_package_manifest_from_filesystem(publisher, fmri); + } + + let index = index_read_result.unwrap(); + + // Check if the package is in the index + match index.get_entry(&key) { + Ok(Some((_, manifest))) => { + // Return the manifest content directly from the index + Ok(Some(manifest)) + }, + Ok(None) => { + // Package not found in the index, fall back to the filesystem check + self.get_obsoleted_package_manifest_from_filesystem(publisher, fmri) + }, + Err(e) => { + warn!("Failed to get entry from index: {}", e); + // Fall back to the filesystem check if there's an error + self.get_obsoleted_package_manifest_from_filesystem(publisher, fmri) + } + } + } + + /// Get the manifest content for an obsoleted package from the filesystem + fn get_obsoleted_package_manifest_from_filesystem( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result> { + let stem = fmri.stem(); + let version = fmri.version(); + let encoded_version = url_encode(&version); + let metadata_path = self.base_path.join(publisher).join(stem).join(format!("{}.json", encoded_version)); + let manifest_path = self.base_path.join(publisher).join(stem).join(format!("{}.manifest", encoded_version)); + + if !manifest_path.exists() { + debug!("Manifest file not found: {}", manifest_path.display()); + return Ok(None); + } + + // Read the manifest file + let manifest_content = fs::read_to_string(&manifest_path) + .map_err(|e| ObsoletedPackageError::ManifestReadError(format!( + "Failed to read manifest file {}: {}", + manifest_path.display(), e + )))?; + + // Update the index with this package + if metadata_path.exists() && manifest_path.exists() { + if let Err(e) = self.update_index_entry(publisher, fmri, &metadata_path, &manifest_path) { + warn!("Failed to update index entry: {}", e); + } + } + + Ok(Some(manifest_content)) + } + + /// Get manifest content and remove an obsoleted package + /// + /// This method retrieves the manifest content of an obsoleted package and removes it + /// from the obsoleted packages directory. It's used as part of the process to restore + /// an obsoleted package to the main repository. + /// + /// # Arguments + /// + /// * `publisher` - The publisher of the obsoleted package + /// * `fmri` - The FMRI of the obsoleted package + /// + /// # Returns + /// + /// The manifest content if the package was found, or an error if the operation failed + pub fn get_and_remove_obsoleted_package( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result { + debug!("Getting and removing obsoleted package: {} (publisher: {})", fmri, publisher); + + // Get the manifest content + let manifest_content = match self.get_obsoleted_package_manifest(publisher, fmri)? { + Some(content) => content, + None => { + return Err(ObsoletedPackageError::NotFound(format!( + "Obsoleted package not found: {}", + fmri + )).into()); + } + }; + + // Remove the obsoleted package from the obsoleted packages directory + self.remove_obsoleted_package(publisher, fmri)?; + + info!("Retrieved and removed obsoleted package: {}", fmri); + Ok(manifest_content) + } + + /// Remove an obsoleted package + /// + /// This method removes an obsoleted package from the obsoleted packages directory. + /// It can be used after restoring a package to the main repository. + /// + /// # Arguments + /// + /// * `publisher` - The publisher of the obsoleted package + /// * `fmri` - The FMRI of the obsoleted package + /// + /// # Returns + /// + /// `true` if the package was removed, `false` if it was not found + pub fn remove_obsoleted_package( + &self, + publisher: &str, + fmri: &Fmri, + ) -> Result { + let stem = fmri.stem(); + let version = fmri.version(); + let encoded_version = url_encode(&version); + let metadata_path = self.base_path.join(publisher).join(stem).join(format!("{}.json", encoded_version)); + let manifest_path = self.base_path.join(publisher).join(stem).join(format!("{}.manifest", encoded_version)); + + debug!("Removing obsoleted package: {} (publisher: {})", fmri, publisher); + debug!("Metadata path: {}", metadata_path.display()); + debug!("Manifest path: {}", manifest_path.display()); + + if !metadata_path.exists() && !manifest_path.exists() { + // Package not found + debug!("Obsoleted package not found: {}", fmri); + return Ok(false); + } + + // Remove the metadata file if it exists + if metadata_path.exists() { + debug!("Removing metadata file: {}", metadata_path.display()); + fs::remove_file(&metadata_path) + .map_err(|e| ObsoletedPackageError::RemoveError(format!( + "Failed to remove metadata file {}: {}", + metadata_path.display(), e + )))?; + } + + // Remove the manifest file if it exists + if manifest_path.exists() { + debug!("Removing manifest file: {}", manifest_path.display()); + fs::remove_file(&manifest_path) + .map_err(|e| ObsoletedPackageError::RemoveError(format!( + "Failed to remove manifest file {}: {}", + manifest_path.display(), e + )))?; + } + + // Check if the package directory is empty and remove it if it is + let pkg_dir = self.base_path.join(publisher).join(stem); + if pkg_dir.exists() { + debug!("Checking if package directory is empty: {}", pkg_dir.display()); + let is_empty = fs::read_dir(&pkg_dir) + .map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to read directory {}: {}", + pkg_dir.display(), e + )))?.next().is_none(); + + if is_empty { + debug!("Removing empty package directory: {}", pkg_dir.display()); + fs::remove_dir(&pkg_dir) + .map_err(|e| ObsoletedPackageError::RemoveError(format!( + "Failed to remove directory {}: {}", + pkg_dir.display(), e + )))?; + } + } + + // Remove the package from the index + let key = ObsoletedPackageKey::new(publisher, fmri); + + // Try to get a write lock on the index + match self.index.write() { + Ok(index) => { + // Try to remove the entry from the index + match index.remove_entry(&key) { + Ok(true) => { + debug!("Removed package from index: {}", fmri); + }, + Ok(false) => { + debug!("Package not found in index: {}", fmri); + // If the package is not in the index, we need to rebuild the index + // This is a fallback in case the index is out of sync with the filesystem + if let Err(e) = self.build_index() { + warn!("Failed to rebuild index after package not found: {}", e); + } + }, + Err(e) => { + warn!("Failed to remove package from index: {}: {}", fmri, e); + // If there's an error removing the entry, rebuild the index + if let Err(e) = self.build_index() { + warn!("Failed to rebuild index after error: {}", e); + } + } + } + }, + Err(e) => { + warn!("Failed to acquire write lock on index, package not removed from index: {}: {}", fmri, e); + // If we can't get a write lock, mark the index as dirty so it will be rebuilt next time + if let Ok(index) = self.index.write() { + // This is a new write attempt, so it might succeed even if the previous one failed + if let Err(e) = index.clear() { + warn!("Failed to clear index: {}", e); + } + } + } + } + + info!("Removed obsoleted package: {}", fmri); + Ok(true) + } + + /// List all obsoleted packages for a publisher + /// + /// This method returns all obsoleted packages for a publisher without pagination. + /// For large repositories, consider using `list_obsoleted_packages_paginated` instead. + pub fn list_obsoleted_packages(&self, publisher: &str) -> Result> { + // Ensure the index is fresh + if let Err(e) = self.ensure_index_is_fresh() { + warn!("Failed to ensure index is fresh: {}", e); + // Fall back to the filesystem check if the index is not available + return self.list_obsoleted_packages_from_filesystem(publisher); + } + + // Try to get a read lock on the index + let index_read_result = self.index.read(); + if let Err(e) = index_read_result { + warn!("Failed to acquire read lock on index: {}", e); + // Fall back to the filesystem check if the index is not available + return self.list_obsoleted_packages_from_filesystem(publisher); + } + + let index = index_read_result.unwrap(); + + // Use get_entries_by_publisher to get all entries for the specified publisher + let entries = match index.get_entries_by_publisher(publisher) { + Ok(entries) => entries, + Err(e) => { + warn!("Failed to get entries for publisher from index: {}", e); + // Fall back to the filesystem check if there's an error + return self.list_obsoleted_packages_from_filesystem(publisher); + } + }; + + // Convert entries to FMRIs + let mut packages = Vec::new(); + for (key, _, _) in entries { + // Try to parse the FMRI from the components + let fmri_str = format!("pkg://{}/{}@{}", key.publisher, key.stem, key.version); + if let Ok(fmri) = Fmri::parse(&fmri_str) { + packages.push(fmri); + } + } + + Ok(packages) + } + + /// List all obsoleted packages for a publisher from the filesystem + /// + /// This method is used as a fallback when the index is not available. + fn list_obsoleted_packages_from_filesystem(&self, publisher: &str) -> Result> { + let publisher_dir = self.base_path.join(publisher); + if !publisher_dir.exists() { + return Ok(Vec::new()); + } + + let mut obsoleted_packages = Vec::new(); + + // Walk through the publisher directory + for entry in walkdir::WalkDir::new(&publisher_dir) + .min_depth(2) // Skip the publisher directory itself + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if path.is_file() && path.extension().map_or(false, |ext| ext == "json") { + // Read the metadata file + if let Ok(metadata_json) = fs::read_to_string(path) { + if let Ok(metadata) = serde_json::from_str::(&metadata_json) { + // Parse the FMRI + if let Ok(fmri) = Fmri::parse(&metadata.fmri) { + obsoleted_packages.push(fmri); + } + } + } + } + } + + Ok(obsoleted_packages) + } + + + /// List obsoleted packages for a publisher with pagination + /// + /// This method returns a paginated list of obsoleted packages for a publisher. + /// It's useful when dealing with repositories that have many obsoleted packages. + /// + /// # Arguments + /// + /// * `publisher` - The publisher to list packages for + /// * `page` - The page number (1-based, defaults to 1) + /// * `page_size` - The number of packages per page (defaults to 100) + /// + /// # Returns + /// + /// A paginated result containing the packages for the requested page + pub fn list_obsoleted_packages_paginated( + &self, + publisher: &str, + page: Option, + page_size: Option, + ) -> Result { + let publisher_dir = self.base_path.join(publisher); + if !publisher_dir.exists() { + return Ok(PaginatedObsoletedPackages { + packages: Vec::new(), + total_count: 0, + page: 1, + page_size: page_size.unwrap_or(100), + total_pages: 0, + }); + } + + let mut all_packages = Vec::new(); + + // Walk through the publisher directory + for entry in walkdir::WalkDir::new(&publisher_dir) + .min_depth(2) // Skip the publisher directory itself + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if path.is_file() && path.extension().map_or(false, |ext| ext == "json") { + // Read the metadata file + if let Ok(metadata_json) = fs::read_to_string(path) { + if let Ok(metadata) = serde_json::from_str::(&metadata_json) { + // Parse the FMRI + if let Ok(fmri) = Fmri::parse(&metadata.fmri) { + all_packages.push(fmri); + } + } + } + } + } + + // Sort packages by name and version for consistent pagination + all_packages.sort_by(|a, b| { + let name_cmp = a.stem().cmp(b.stem()); + if name_cmp == std::cmp::Ordering::Equal { + a.version().cmp(&b.version()) + } else { + name_cmp + } + }); + + // Calculate pagination + let page = page.unwrap_or(1).max(1); // Ensure page is at least 1 + let page_size = page_size.unwrap_or(100); + let total_count = all_packages.len(); + let total_pages = if total_count == 0 { + 0 + } else { + (total_count + page_size - 1) / page_size + }; + + // If no pagination is requested or there's only one page, return all packages + if page_size == 0 || total_pages <= 1 { + return Ok(PaginatedObsoletedPackages { + packages: all_packages, + total_count, + page: 1, + page_size, + total_pages, + }); + } + + // Calculate start and end indices for the requested page + let start_idx = (page - 1) * page_size; + let end_idx = start_idx + page_size; + + // Get packages for the requested page + let packages = if start_idx >= total_count { + // If start index is beyond the total count, return an empty page + Vec::new() + } else { + all_packages[start_idx..end_idx.min(total_count)].to_vec() + }; + + Ok(PaginatedObsoletedPackages { + packages, + total_count, + page, + page_size, + total_pages, + }) + } + + /// Search for obsoleted packages matching a pattern + /// + /// This method searches for obsoleted packages that match the given pattern. + /// The pattern can be a simple substring or a regular expression. + /// + /// # Arguments + /// + /// * `publisher` - The publisher to search in + /// * `pattern` - The pattern to search for (substring or regex) + /// + /// # Returns + /// + /// A list of FMRIs for obsoleted packages that match the pattern + pub fn search_obsoleted_packages(&self, publisher: &str, pattern: &str) -> Result> { + // Ensure the index is fresh + if let Err(e) = self.ensure_index_is_fresh() { + warn!("Failed to ensure index is fresh: {}", e); + // Fall back to the filesystem-based search + return self.search_obsoleted_packages_fallback(publisher, pattern); + } + + // Try to get a read lock on the index + let index_read_result = self.index.read(); + if let Err(e) = index_read_result { + warn!("Failed to acquire read lock on index: {}", e); + // Fall back to the filesystem-based search + return self.search_obsoleted_packages_fallback(publisher, pattern); + } + + let index = index_read_result.unwrap(); + + // Get all entries from the index + let entries = match index.get_all_entries() { + Ok(entries) => entries, + Err(e) => { + warn!("Failed to get entries from index: {}", e); + // Fall back to the filesystem-based search + return self.search_obsoleted_packages_fallback(publisher, pattern); + } + }; + + // Check if the pattern looks like a version number + if pattern.chars().all(|c| c.is_digit(10) || c == '.') { + // This looks like a version number, so match only against the version part + let mut packages = Vec::new(); + for (key, _, _) in entries { + if key.publisher == publisher && key.version.contains(pattern) { + // Construct the FMRI string + let fmri_str = format!("pkg://{}/{}@{}", key.publisher, key.stem, key.version); + + // Parse the FMRI + if let Ok(fmri) = Fmri::parse(&fmri_str) { + packages.push(fmri); + } + } + } + return Ok(packages); + } + + // Try to compile the pattern as a regex + let result = match Regex::new(pattern) { + Ok(regex) => { + // Collect packages from the index that match the regex + let mut packages = Vec::new(); + for (key, _, _) in entries { + if key.publisher == publisher { + // Construct the FMRI string for regex matching + let fmri_str = format!("pkg://{}/{}@{}", key.publisher, key.stem, key.version); + + // Match against the FMRI string or the package name + if regex.is_match(&fmri_str) || regex.is_match(&key.stem) { + // Parse the FMRI + if let Ok(fmri) = Fmri::parse(&fmri_str) { + packages.push(fmri); + } + } + } + } + packages + }, + Err(_) => { + // Fall back to simple substring matching + let mut packages = Vec::new(); + for (key, _, _) in entries { + if key.publisher == publisher { + // Construct the FMRI string + let fmri_str = format!("pkg://{}/{}@{}", key.publisher, key.stem, key.version); + + // Match against the FMRI string or the package name + // For "package-" pattern, we want to match only packages that start with "package-" + if pattern.ends_with("-") && key.stem.starts_with(pattern) { + // Parse the FMRI + if let Ok(fmri) = Fmri::parse(&fmri_str) { + packages.push(fmri); + } + } + // For version searches like "2.0", match only the version part + else if pattern.chars().all(|c| c.is_digit(10) || c == '.') { + // This looks like a version number, so match only against the version part + if key.version.contains(pattern) { + // Parse the FMRI + if let Ok(fmri) = Fmri::parse(&fmri_str) { + packages.push(fmri); + } + } + } + else if fmri_str.contains(pattern) || key.stem.contains(pattern) { + // Parse the FMRI + if let Ok(fmri) = Fmri::parse(&fmri_str) { + packages.push(fmri); + } + } + } + } + packages + } + }; + + Ok(result) + } + + /// Fallback implementation of search_obsoleted_packages that uses the filesystem + fn search_obsoleted_packages_fallback(&self, publisher: &str, pattern: &str) -> Result> { + // Get all obsoleted packages for the publisher + let all_packages = self.list_obsoleted_packages(publisher)?; + + // Check if the pattern looks like a version number + if pattern.chars().all(|c| c.is_digit(10) || c == '.') { + // This looks like a version number, so match only against the version part + return Ok(all_packages + .into_iter() + .filter(|fmri| fmri.version().contains(pattern)) + .collect()); + } + + // Try to compile the pattern as a regex + let result = match Regex::new(pattern) { + Ok(regex) => { + // Filter packages using regex + all_packages + .into_iter() + .filter(|fmri| { + // Match against the FMRI string + regex.is_match(&fmri.to_string()) || + // Match against the package name + regex.is_match(fmri.stem()) + }) + .collect() + }, + Err(_) => { + // If regex compilation fails, fall back to simple substring matching + all_packages + .into_iter() + .filter(|fmri| { + // Match against the FMRI string or the package name + // For "package-" pattern, we want to match only packages that start with "package-" + if pattern.ends_with("-") && fmri.stem().starts_with(pattern) { + true + } + // For version searches like "2.0", match only the version part + else if pattern.chars().all(|c| c.is_digit(10) || c == '.') { + // This looks like a version number, so match only against the version part + fmri.version().contains(pattern) + } + else { + // Match against the FMRI string + fmri.to_string().contains(pattern) || + // Match against the package name + fmri.stem().contains(pattern) + } + }) + .collect() + } + }; + + Ok(result) + } + + /// Export obsoleted packages to a file + /// + /// This method exports obsoleted packages to a JSON file that can be imported into another repository. + /// + /// # Arguments + /// + /// * `publisher` - The publisher to export packages for + /// * `pattern` - Optional pattern to filter packages by + /// * `output_file` - Path to the output file + /// + /// # Returns + /// + /// The number of packages exported + pub fn export_obsoleted_packages( + &self, + publisher: &str, + pattern: Option<&str>, + output_file: &Path, + ) -> Result { + info!("Exporting obsoleted packages for publisher: {}", publisher); + + // Get the packages to export + let packages = if let Some(pattern) = pattern { + self.search_obsoleted_packages(publisher, pattern)? + } else { + self.list_obsoleted_packages(publisher)? + }; + + if packages.is_empty() { + info!("No packages found to export"); + return Ok(0); + } + + info!("Found {} packages to export", packages.len()); + + // Create the export structure + let mut export = ObsoletedPackagesExport { + version: 1, + export_date: format_timestamp(&SystemTime::now()), + packages: Vec::new(), + }; + + // Add each package to the export + for fmri in packages { + // Get the metadata + let metadata = match self.get_obsoleted_package_metadata(publisher, &fmri)? { + Some(metadata) => metadata, + None => { + warn!("Metadata not found for package: {}", fmri); + continue; + } + }; + + // Get the manifest content + let manifest = match self.get_obsoleted_package_manifest(publisher, &fmri)? { + Some(manifest) => manifest, + None => { + warn!("Manifest not found for package: {}", fmri); + continue; + } + }; + + // Add the package to the export + export.packages.push(ObsoletedPackageExport { + publisher: publisher.to_string(), + fmri: fmri.to_string(), + metadata, + manifest, + }); + } + + // Write the export to the output file + let file = fs::File::create(output_file).map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to create output file {}: {}", + output_file.display(), e + )))?; + + let writer = std::io::BufWriter::new(file); + serde_json::to_writer_pretty(writer, &export).map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to write export to file {}: {}", + output_file.display(), e + )))?; + + info!("Exported {} packages to {}", export.packages.len(), output_file.display()); + Ok(export.packages.len()) + } + + /// Import obsoleted packages from a file + /// + /// This method imports obsoleted packages from a JSON file created by `export_obsoleted_packages`. + /// + /// # Arguments + /// + /// * `input_file` - Path to the input file + /// * `override_publisher` - Optional publisher to use instead of the one in the export file + /// + /// # Returns + /// + /// The number of packages imported + pub fn import_obsoleted_packages( + &self, + input_file: &Path, + override_publisher: Option<&str>, + ) -> Result { + info!("Importing obsoleted packages from {}", input_file.display()); + + // Read the export file + let file = fs::File::open(input_file).map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to open input file {}: {}", + input_file.display(), e + )))?; + + let reader = std::io::BufReader::new(file); + let export: ObsoletedPackagesExport = serde_json::from_reader(reader).map_err(|e| ObsoletedPackageError::IoError(format!( + "Failed to parse export from file {}: {}", + input_file.display(), e + )))?; + + info!("Found {} packages to import", export.packages.len()); + + // Import each package + let mut imported_count = 0; + for package in export.packages { + // Determine the publisher to use + let publisher = override_publisher.unwrap_or(&package.publisher); + + // Parse the FMRI + let fmri = match Fmri::parse(&package.fmri) { + Ok(fmri) => fmri, + Err(e) => { + warn!("Failed to parse FMRI '{}': {}", package.fmri, e); + continue; + } + }; + + // Store the obsoleted package + match self.store_obsoleted_package( + publisher, + &fmri, + &package.manifest, + package.metadata.obsoleted_by, + package.metadata.deprecation_message, + ) { + Ok(_) => { + info!("Imported obsoleted package: {}", fmri); + imported_count += 1; + }, + Err(e) => { + warn!("Failed to import obsoleted package {}: {}", fmri, e); + } + } + } + + info!("Imported {} packages", imported_count); + Ok(imported_count) + } + + /// Batch process multiple obsoleted packages + /// + /// This method applies a function to multiple obsoleted packages in batch. + /// It's useful for operations that need to be performed on many packages at once. + /// + /// # Arguments + /// + /// * `publisher` - The publisher of the obsoleted packages + /// * `fmris` - A list of FMRIs to process + /// * `batch_size` - The number of packages to process in each batch (default: 100) + /// * `processor` - A function that takes a publisher and an FMRI and returns a result + /// + /// # Returns + /// + /// A list of results, one for each input FMRI + pub fn batch_process(&self, + publisher: &str, + fmris: &[Fmri], + batch_size: Option, + processor: F + ) -> Result>> + where + F: Fn(&str, &Fmri) -> std::result::Result, + E: std::fmt::Debug, + { + let batch_size = batch_size.unwrap_or(100); + let mut results = Vec::with_capacity(fmris.len()); + + // Process packages in batches + for chunk in fmris.chunks(batch_size) { + for fmri in chunk { + let result = processor(publisher, fmri); + results.push(result); + } + } + + Ok(results) + } +} + +/// URL encode a string +fn url_encode(s: &str) -> String { + let mut encoded = String::new(); + for c in s.chars() { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' | '~' => encoded.push(c), + _ => { + for b in c.to_string().as_bytes() { + encoded.push_str(&format!("%{:02X}", b)); + } + } + } + } + encoded +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn test_obsoleted_package_manager_basic() { + // Create a temporary directory for testing + let temp_dir = tempdir().unwrap(); + let manager = ObsoletedPackageManager::new(temp_dir.path()); + + // Initialize the manager + manager.init().unwrap(); + + // Create a test FMRI + let fmri = Fmri::parse("pkg://test/package@1.0,5.11-0.1:20250101T000000Z").unwrap(); + + // Store an obsoleted package + let manifest_content = r#"{ + "attributes": [ + { + "key": "pkg.fmri", + "values": ["pkg://test/package@1.0,5.11-0.1:20250101T000000Z"] + }, + { + "key": "pkg.obsolete", + "values": ["true"] + } + ] + }"#; + + let obsoleted_by = Some(vec!["pkg://test/new-package@2.0".to_string()]); + let deprecation_message = Some("This package is deprecated. Use new-package instead.".to_string()); + + let metadata_path = manager + .store_obsoleted_package( + "test", + &fmri, + manifest_content, + obsoleted_by.clone(), + deprecation_message.clone(), + ) + .unwrap(); + + // Check if the metadata file exists + assert!(metadata_path.exists()); + + // Check if the package is obsoleted + assert!(manager.is_obsoleted("test", &fmri)); + + // Get the metadata + let metadata = manager.get_obsoleted_package_metadata("test", &fmri).unwrap().unwrap(); + assert_eq!(metadata.fmri, fmri.to_string()); + assert_eq!(metadata.status, "obsolete"); + assert_eq!(metadata.obsoleted_by, obsoleted_by); + assert_eq!(metadata.deprecation_message, deprecation_message); + + // List obsoleted packages + let obsoleted_packages = manager.list_obsoleted_packages("test").unwrap(); + assert_eq!(obsoleted_packages.len(), 1); + assert_eq!(obsoleted_packages[0].to_string(), fmri.to_string()); + } + + #[test] + fn test_obsoleted_package_manager_search() { + // Create a temporary directory for testing + let temp_dir = tempdir().unwrap(); + let manager = ObsoletedPackageManager::new(temp_dir.path()); + manager.init().unwrap(); + + // Create multiple test FMRIs + let fmri1 = Fmri::parse("pkg://test/package-one@1.0,5.11-0.1:20250101T000000Z").unwrap(); + let fmri2 = Fmri::parse("pkg://test/package-two@2.0,5.11-0.1:20250101T000000Z").unwrap(); + let fmri3 = Fmri::parse("pkg://test/other-package@3.0,5.11-0.1:20250101T000000Z").unwrap(); + + // Store obsoleted packages + let manifest_template = r#"{ + "attributes": [ + { + "key": "pkg.fmri", + "values": ["%s"] + }, + { + "key": "pkg.obsolete", + "values": ["true"] + } + ] + }"#; + + let manifest1 = manifest_template.replace("%s", &fmri1.to_string()); + let manifest2 = manifest_template.replace("%s", &fmri2.to_string()); + let manifest3 = manifest_template.replace("%s", &fmri3.to_string()); + + manager.store_obsoleted_package("test", &fmri1, &manifest1, None, None).unwrap(); + manager.store_obsoleted_package("test", &fmri2, &manifest2, None, None).unwrap(); + manager.store_obsoleted_package("test", &fmri3, &manifest3, None, None).unwrap(); + + // Test search with substring + let results = manager.search_obsoleted_packages("test", "package-").unwrap(); + assert_eq!(results.len(), 2); + assert!(results.iter().any(|f| f.to_string() == fmri1.to_string())); + assert!(results.iter().any(|f| f.to_string() == fmri2.to_string())); + + // Test search with regex + let results = manager.search_obsoleted_packages("test", "package-.*").unwrap(); + assert_eq!(results.len(), 2); + + // Test search for specific version + let results = manager.search_obsoleted_packages("test", "2.0").unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].to_string(), fmri2.to_string()); + + // Test search with no matches + let results = manager.search_obsoleted_packages("test", "nonexistent").unwrap(); + assert_eq!(results.len(), 0); + } + + #[test] + fn test_obsoleted_package_manager_pagination() { + // Create a temporary directory for testing + let temp_dir = tempdir().unwrap(); + let manager = ObsoletedPackageManager::new(temp_dir.path()); + manager.init().unwrap(); + + // Create 10 test FMRIs + let mut fmris = Vec::new(); + let manifest_template = r#"{ + "attributes": [ + { + "key": "pkg.fmri", + "values": ["%s"] + }, + { + "key": "pkg.obsolete", + "values": ["true"] + } + ] + }"#; + + for i in 1..=10 { + let fmri = Fmri::parse(&format!("pkg://test/package-{:02}@1.0,5.11-0.1:20250101T000000Z", i)).unwrap(); + let manifest = manifest_template.replace("%s", &fmri.to_string()); + manager.store_obsoleted_package("test", &fmri, &manifest, None, None).unwrap(); + fmris.push(fmri); + } + + // Test pagination with page size 3 + let page1 = manager.list_obsoleted_packages_paginated("test", Some(1), Some(3)).unwrap(); + assert_eq!(page1.packages.len(), 3); + assert_eq!(page1.total_count, 10); + assert_eq!(page1.page, 1); + assert_eq!(page1.page_size, 3); + assert_eq!(page1.total_pages, 4); + + let page2 = manager.list_obsoleted_packages_paginated("test", Some(2), Some(3)).unwrap(); + assert_eq!(page2.packages.len(), 3); + assert_eq!(page2.page, 2); + + let page4 = manager.list_obsoleted_packages_paginated("test", Some(4), Some(3)).unwrap(); + assert_eq!(page4.packages.len(), 1); // Last page has only 1 item + + // Test pagination with page beyond total + let empty_page = manager.list_obsoleted_packages_paginated("test", Some(5), Some(3)).unwrap(); + assert_eq!(empty_page.packages.len(), 0); + assert_eq!(empty_page.total_count, 10); + assert_eq!(empty_page.page, 5); + + // Test with no pagination + let all_packages = manager.list_obsoleted_packages_paginated("test", None, None).unwrap(); + assert_eq!(all_packages.packages.len(), 10); + } + + #[test] + fn test_obsoleted_package_manager_remove() { + // Create a temporary directory for testing + let temp_dir = tempdir().unwrap(); + let manager = ObsoletedPackageManager::new(temp_dir.path()); + manager.init().unwrap(); + + // Create a test FMRI + let fmri = Fmri::parse("pkg://test/package@1.0,5.11-0.1:20250101T000000Z").unwrap(); + + // Store an obsoleted package + let manifest_content = r#"{ + "attributes": [ + { + "key": "pkg.fmri", + "values": ["pkg://test/package@1.0,5.11-0.1:20250101T000000Z"] + }, + { + "key": "pkg.obsolete", + "values": ["true"] + } + ] + }"#; + + manager.store_obsoleted_package("test", &fmri, manifest_content, None, None).unwrap(); + + // Verify the package exists + assert!(manager.is_obsoleted("test", &fmri)); + + // Remove the package + let removed = manager.remove_obsoleted_package("test", &fmri).unwrap(); + assert!(removed); + + // Verify the package no longer exists + assert!(!manager.is_obsoleted("test", &fmri)); + + // Try to remove a non-existent package + let not_removed = manager.remove_obsoleted_package("test", &fmri).unwrap(); + assert!(!not_removed); + } + + #[test] + fn test_obsoleted_package_manager_batch_processing() { + // Create a temporary directory for testing + let temp_dir = tempdir().unwrap(); + let manager = ObsoletedPackageManager::new(temp_dir.path()); + manager.init().unwrap(); + + // Create multiple test FMRIs + let fmri1 = Fmri::parse("pkg://test/package-one@1.0,5.11-0.1:20250101T000000Z").unwrap(); + let fmri2 = Fmri::parse("pkg://test/package-two@2.0,5.11-0.1:20250101T000000Z").unwrap(); + let fmri3 = Fmri::parse("pkg://test/package-three@3.0,5.11-0.1:20250101T000000Z").unwrap(); + + // Store obsoleted packages + let manifest_template = r#"{ + "attributes": [ + { + "key": "pkg.fmri", + "values": ["%s"] + }, + { + "key": "pkg.obsolete", + "values": ["true"] + } + ] + }"#; + + let manifest1 = manifest_template.replace("%s", &fmri1.to_string()); + let manifest2 = manifest_template.replace("%s", &fmri2.to_string()); + let manifest3 = manifest_template.replace("%s", &fmri3.to_string()); + + manager.store_obsoleted_package("test", &fmri1, &manifest1, None, None).unwrap(); + manager.store_obsoleted_package("test", &fmri2, &manifest2, None, None).unwrap(); + manager.store_obsoleted_package("test", &fmri3, &manifest3, None, None).unwrap(); + + // Test batch processing with is_obsoleted + let fmris = vec![fmri1.clone(), fmri2.clone(), fmri3.clone()]; + let results: Vec> = + manager.batch_process("test", &fmris, Some(2), |pub_name, fmri| { + Ok(manager.is_obsoleted(pub_name, fmri)) + }).unwrap(); + + assert_eq!(results.len(), 3); + assert!(results[0].as_ref().unwrap()); + assert!(results[1].as_ref().unwrap()); + assert!(results[2].as_ref().unwrap()); + + // Test batch processing with remove + let results: Vec> = + manager.batch_process("test", &fmris, Some(2), |pub_name, fmri| { + manager.remove_obsoleted_package(pub_name, fmri) + }).unwrap(); + + assert_eq!(results.len(), 3); + assert!(results[0].as_ref().unwrap()); + assert!(results[1].as_ref().unwrap()); + assert!(results[2].as_ref().unwrap()); + + // Verify all packages are removed + assert!(!manager.is_obsoleted("test", &fmri1)); + assert!(!manager.is_obsoleted("test", &fmri2)); + assert!(!manager.is_obsoleted("test", &fmri3)); + } +} \ No newline at end of file diff --git a/pkg6repo/src/e2e_tests.rs b/pkg6repo/src/e2e_tests.rs index b0270dd..2d77da1 100644 --- a/pkg6repo/src/e2e_tests.rs +++ b/pkg6repo/src/e2e_tests.rs @@ -389,4 +389,150 @@ mod e2e_tests { // Clean up cleanup_test_dir(&test_dir); } + + #[test] + fn test_e2e_obsoleted_packages() { + // Run the setup script to prepare the test environment + let (prototype_dir, manifest_dir) = run_setup_script(); + + // Create a test directory + let test_dir = create_test_dir("e2e_obsoleted_packages"); + let repo_path = test_dir.join("repo"); + + // Create a repository using pkg6repo + let result = run_pkg6repo(&["create", "--repo-version", "4", repo_path.to_str().unwrap()]); + assert!( + result.is_ok(), + "Failed to create repository: {:?}", + result.err() + ); + + // Add a publisher using pkg6repo + let result = run_pkg6repo(&["add-publisher", "-s", repo_path.to_str().unwrap(), "test"]); + assert!( + result.is_ok(), + "Failed to add publisher: {:?}", + result.err() + ); + + // Publish a package using pkg6dev + let manifest_path = manifest_dir.join("example.p5m"); + let result = run_pkg6dev(&[ + "publish", + "--manifest-path", + manifest_path.to_str().unwrap(), + "--prototype-dir", + prototype_dir.to_str().unwrap(), + "--repo-path", + repo_path.to_str().unwrap(), + ]); + assert!( + result.is_ok(), + "Failed to publish package: {:?}", + result.err() + ); + + // Get the FMRI of the published package + let result = run_pkg6repo(&["list", "-s", repo_path.to_str().unwrap(), "-F", "json"]); + assert!( + result.is_ok(), + "Failed to list packages: {:?}", + result.err() + ); + + let output = result.unwrap(); + let packages: serde_json::Value = serde_json::from_str(&output).expect("Failed to parse JSON output"); + + // The FMRI in the JSON is an object with scheme, publisher, name, and version fields + // We need to extract these fields and construct the FMRI string + let fmri_obj = &packages["packages"][0]["fmri"]; + let scheme = fmri_obj["scheme"].as_str().expect("Failed to get scheme"); + let publisher = fmri_obj["publisher"].as_str().expect("Failed to get publisher"); + let name = fmri_obj["name"].as_str().expect("Failed to get name"); + let version_obj = &fmri_obj["version"]; + let release = version_obj["release"].as_str().expect("Failed to get release"); + + // Construct the FMRI string in the format "pkg://publisher/name@version" + let fmri = format!("{}://{}/{}", scheme, publisher, name); + + // Add version if available + let fmri = if !release.is_empty() { + format!("{}@{}", fmri, release) + } else { + fmri + }; + + // Mark the package as obsoleted + let result = run_pkg6repo(&[ + "obsolete-package", + "-s", repo_path.to_str().unwrap(), + "-p", "test", + "-f", &fmri, + "-m", "This package is obsoleted for testing purposes", + "-r", "pkg://test/example2@1.0" + ]); + assert!( + result.is_ok(), + "Failed to mark package as obsoleted: {:?}", + result.err() + ); + + // Verify the package is no longer in the main repository + let result = run_pkg6repo(&["list", "-s", repo_path.to_str().unwrap()]); + assert!( + result.is_ok(), + "Failed to list packages: {:?}", + result.err() + ); + + let output = result.unwrap(); + assert!( + !output.contains("example"), + "Package still found in repository after being marked as obsoleted" + ); + + // List obsoleted packages + let result = run_pkg6repo(&["list-obsoleted", "-s", repo_path.to_str().unwrap(), "-p", "test"]); + assert!( + result.is_ok(), + "Failed to list obsoleted packages: {:?}", + result.err() + ); + + let output = result.unwrap(); + assert!( + output.contains("example"), + "Obsoleted package not found in obsoleted packages list" + ); + + // Show details of the obsoleted package + let result = run_pkg6repo(&[ + "show-obsoleted", + "-s", repo_path.to_str().unwrap(), + "-p", "test", + "-f", &fmri + ]); + assert!( + result.is_ok(), + "Failed to show obsoleted package details: {:?}", + result.err() + ); + + let output = result.unwrap(); + assert!( + output.contains("Status: obsolete"), + "Package not marked as obsolete in details" + ); + assert!( + output.contains("This package is obsoleted for testing purposes"), + "Deprecation message not found in details" + ); + assert!( + output.contains("pkg://test/example2@1.0"), + "Replacement package not found in details" + ); + + // Clean up + cleanup_test_dir(&test_dir); + } } \ No newline at end of file diff --git a/pkg6repo/src/error.rs b/pkg6repo/src/error.rs index 6f2857e..2b73c64 100644 --- a/pkg6repo/src/error.rs +++ b/pkg6repo/src/error.rs @@ -1,4 +1,5 @@ use libips::actions::ActionError; +use libips::fmri::FmriError; use libips::repository; use miette::Diagnostic; use thiserror::Error; @@ -73,3 +74,10 @@ impl From<&str> for Pkg6RepoError { Pkg6RepoError::Other(s.to_string()) } } + +/// Convert a FmriError to a Pkg6RepoError +impl From for Pkg6RepoError { + fn from(err: FmriError) -> Self { + Pkg6RepoError::Other(format!("FMRI error: {}", err)) + } +} diff --git a/pkg6repo/src/main.rs b/pkg6repo/src/main.rs index 590643a..16bb22b 100644 --- a/pkg6repo/src/main.rs +++ b/pkg6repo/src/main.rs @@ -3,6 +3,22 @@ mod pkg5_import; use error::{Pkg6RepoError, Result}; use pkg5_import::Pkg5Importer; +/// URL encode a string for use in a filename +fn url_encode(s: &str) -> String { + let mut result = String::new(); + for c in s.chars() { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' | '~' => result.push(c), + ' ' => result.push('+'), + _ => { + result.push('%'); + result.push_str(&format!("{:02X}", c as u8)); + } + } + } + result +} + use clap::{Parser, Subcommand}; use libips::repository::{FileBackend, ReadableRepository, RepositoryVersion, WritableRepository}; use serde::Serialize; @@ -40,6 +56,22 @@ struct SearchOutput { results: Vec, } +#[derive(Serialize)] +struct ObsoletedPackagesOutput { + packages: Vec, +} + +#[derive(Serialize)] +struct ObsoletedPackageDetailsOutput { + fmri: String, + status: String, + obsolescence_date: String, + deprecation_message: Option, + obsoleted_by: Option>, + metadata_version: u32, + content_hash: String, +} + /// pkg6repo - Image Packaging System repository management utility #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] @@ -318,6 +350,155 @@ enum Commands { #[clap(short = 'p', long)] publisher: Option, }, + + /// Mark a package as obsoleted + ObsoletePackage { + /// Path or URI of the repository + #[clap(short = 's')] + repo_uri_or_path: String, + + /// Publisher of the package + #[clap(short = 'p')] + publisher: String, + + /// FMRI of the package to mark as obsoleted + #[clap(short = 'f')] + fmri: String, + + /// Optional deprecation message explaining why the package is obsoleted + #[clap(short = 'm', long = "message")] + message: Option, + + /// Optional list of packages that replace this obsoleted package + #[clap(short = 'r', long = "replaced-by")] + replaced_by: Option>, + }, + + /// List obsoleted packages in a repository + ListObsoleted { + /// Path or URI of the repository + #[clap(short = 's')] + repo_uri_or_path: String, + + /// Output format + #[clap(short = 'F')] + format: Option, + + /// Omit headers + #[clap(short = 'H')] + omit_headers: bool, + + /// Publisher to list obsoleted packages for + #[clap(short = 'p')] + publisher: String, + + /// Page number (1-based, defaults to 1) + #[clap(long = "page")] + page: Option, + + /// Number of packages per page (defaults to 100, 0 for all) + #[clap(long = "page-size")] + page_size: Option, + }, + + /// Show details of an obsoleted package + ShowObsoleted { + /// Path or URI of the repository + #[clap(short = 's')] + repo_uri_or_path: String, + + /// Output format + #[clap(short = 'F')] + format: Option, + + /// Publisher of the package + #[clap(short = 'p')] + publisher: String, + + /// FMRI of the obsoleted package to show + #[clap(short = 'f')] + fmri: String, + }, + + /// Search for obsoleted packages + SearchObsoleted { + /// Path or URI of the repository + #[clap(short = 's')] + repo_uri_or_path: String, + + /// Output format + #[clap(short = 'F')] + format: Option, + + /// Omit headers + #[clap(short = 'H')] + omit_headers: bool, + + /// Publisher to search obsoleted packages for + #[clap(short = 'p')] + publisher: String, + + /// Search pattern (supports glob patterns) + #[clap(short = 'q')] + pattern: String, + + /// Maximum number of results to return + #[clap(short = 'n', long = "limit")] + limit: Option, + }, + + /// Restore an obsoleted package to the main repository + RestoreObsoleted { + /// Path or URI of the repository + #[clap(short = 's')] + repo_uri_or_path: String, + + /// Publisher of the package + #[clap(short = 'p')] + publisher: String, + + /// FMRI of the obsoleted package to restore + #[clap(short = 'f')] + fmri: String, + + /// Skip rebuilding the catalog after restoration + #[clap(long = "no-rebuild")] + no_rebuild: bool, + }, + + /// Export obsoleted packages to a file + ExportObsoleted { + /// Path or URI of the repository + #[clap(short = 's')] + repo_uri_or_path: String, + + /// Publisher to export obsoleted packages for + #[clap(short = 'p')] + publisher: String, + + /// Output file path + #[clap(short = 'o')] + output_file: String, + + /// Optional search pattern to filter packages + #[clap(short = 'q')] + pattern: Option, + }, + + /// Import obsoleted packages from a file + ImportObsoleted { + /// Path or URI of the repository + #[clap(short = 's')] + repo_uri_or_path: String, + + /// Input file path + #[clap(short = 'i')] + input_file: String, + + /// Override publisher (use this instead of the one in the export file) + #[clap(short = 'p')] + publisher: Option, + }, } fn main() -> Result<()> { @@ -1089,6 +1270,491 @@ fn main() -> Result<()> { info!("Repository imported successfully"); Ok(()) + }, + + Commands::ObsoletePackage { + repo_uri_or_path, + publisher, + fmri, + message, + replaced_by, + } => { + info!("Marking package as obsoleted: {}", fmri); + + // Open the repository + let mut repo = FileBackend::open(repo_uri_or_path)?; + + // Parse the FMRI + let parsed_fmri = libips::fmri::Fmri::parse(fmri)?; + + // Get the manifest for the package + let pkg_dir = repo.path.join("pkg").join(publisher).join(parsed_fmri.stem()); + let encoded_version = url_encode(&parsed_fmri.version()); + let manifest_path = pkg_dir.join(&encoded_version); + + if !manifest_path.exists() { + return Err(Pkg6RepoError::from(format!( + "Package not found: {}", + parsed_fmri + ))); + } + + // Read the manifest content + let manifest_content = std::fs::read_to_string(&manifest_path)?; + + // Create a new scope for the obsoleted_manager to ensure it's dropped before we call repo.rebuild() + { + // Get the obsoleted package manager + let obsoleted_manager = repo.get_obsoleted_manager()?; + + // Store the obsoleted package + obsoleted_manager.store_obsoleted_package( + publisher, + &parsed_fmri, + &manifest_content, + replaced_by.clone(), + message.clone(), + )?; + } // obsoleted_manager is dropped here, releasing the mutable borrow on repo + + // Remove the original package from the repository + std::fs::remove_file(&manifest_path)?; + + // Rebuild the catalog to reflect the changes + repo.rebuild(Some(publisher), false, false)?; + + info!("Package marked as obsoleted successfully: {}", parsed_fmri); + Ok(()) + }, + + Commands::ListObsoleted { + repo_uri_or_path, + format, + omit_headers, + publisher, + page, + page_size, + } => { + info!("Listing obsoleted packages for publisher: {}", publisher); + + // Open the repository + let mut repo = FileBackend::open(repo_uri_or_path)?; + + // Get the obsoleted packages in a new scope to avoid borrowing issues + let paginated_result = { + // Get the obsoleted package manager + let obsoleted_manager = repo.get_obsoleted_manager()?; + + // List obsoleted packages with pagination + obsoleted_manager.list_obsoleted_packages_paginated(publisher, page.clone(), page_size.clone())? + }; // obsoleted_manager is dropped here, releasing the mutable borrow on repo + + // Determine the output format + let output_format = format.as_deref().unwrap_or("table"); + + match output_format { + "table" => { + // Print headers if not omitted + if !omit_headers { + println!("{:<30} {:<15} {:<10}", "NAME", "VERSION", "PUBLISHER"); + } + + // Print packages + for fmri in &paginated_result.packages { + // Format version and publisher, handling optional fields + let version_str = fmri.version(); + + let publisher_str = match &fmri.publisher { + Some(publisher) => publisher.clone(), + None => String::new(), + }; + + println!( + "{:<30} {:<15} {:<10}", + fmri.stem(), + version_str, + publisher_str + ); + } + + // Print pagination information + println!("\nPage {} of {} (Total: {} packages)", + paginated_result.page, + paginated_result.total_pages, + paginated_result.total_count); + }, + "json" => { + // Create a JSON representation of the obsoleted packages with pagination info + #[derive(Serialize)] + struct PaginatedOutput { + packages: Vec, + page: usize, + page_size: usize, + total_pages: usize, + total_count: usize, + } + + let packages_str: Vec = paginated_result.packages.iter().map(|f| f.to_string()).collect(); + let paginated_output = PaginatedOutput { + packages: packages_str, + page: paginated_result.page, + page_size: paginated_result.page_size, + total_pages: paginated_result.total_pages, + total_count: paginated_result.total_count, + }; + + // Serialize to pretty-printed JSON + let json_output = serde_json::to_string_pretty(&paginated_output) + .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e)); + + println!("{}", json_output); + }, + "tsv" => { + // Print headers if not omitted + if !omit_headers { + println!("NAME\tVERSION\tPUBLISHER"); + } + + // Print packages as tab-separated values + for fmri in &paginated_result.packages { + // Format version and publisher, handling optional fields + let version_str = fmri.version(); + + let publisher_str = match &fmri.publisher { + Some(publisher) => publisher.clone(), + None => String::new(), + }; + + println!( + "{}\t{}\t{}", + fmri.stem(), + version_str, + publisher_str + ); + } + + // Print pagination information + println!("\nPAGE\t{}\nTOTAL_PAGES\t{}\nTOTAL_COUNT\t{}", + paginated_result.page, + paginated_result.total_pages, + paginated_result.total_count); + }, + _ => { + return Err(Pkg6RepoError::UnsupportedOutputFormat( + output_format.to_string(), + )); + } + } + + Ok(()) + }, + + Commands::ShowObsoleted { + repo_uri_or_path, + format, + publisher, + fmri, + } => { + info!("Showing details of obsoleted package: {}", fmri); + + // Open the repository + let mut repo = FileBackend::open(repo_uri_or_path)?; + + // Parse the FMRI + let parsed_fmri = libips::fmri::Fmri::parse(fmri)?; + + // Get the obsoleted package metadata in a new scope to avoid borrowing issues + let metadata = { + // Get the obsoleted package manager + let obsoleted_manager = repo.get_obsoleted_manager()?; + + // Get the obsoleted package metadata + match obsoleted_manager.get_obsoleted_package_metadata(publisher, &parsed_fmri)? { + Some(metadata) => metadata, + None => { + return Err(Pkg6RepoError::from(format!( + "Obsoleted package not found: {}", + parsed_fmri + ))); + } + } + }; // obsoleted_manager is dropped here, releasing the mutable borrow on repo + + // Determine the output format + let output_format = format.as_deref().unwrap_or("table"); + + match output_format { + "table" => { + println!("FMRI: {}", metadata.fmri); + println!("Status: {}", metadata.status); + println!("Obsolescence Date: {}", metadata.obsolescence_date); + + if let Some(msg) = &metadata.deprecation_message { + println!("Deprecation Message: {}", msg); + } + + if let Some(replacements) = &metadata.obsoleted_by { + println!("Replaced By:"); + for replacement in replacements { + println!(" {}", replacement); + } + } + + println!("Metadata Version: {}", metadata.metadata_version); + println!("Content Hash: {}", metadata.content_hash); + }, + "json" => { + // Create a JSON representation of the obsoleted package details + let details_output = ObsoletedPackageDetailsOutput { + fmri: metadata.fmri, + status: metadata.status, + obsolescence_date: metadata.obsolescence_date, + deprecation_message: metadata.deprecation_message, + obsoleted_by: metadata.obsoleted_by, + metadata_version: metadata.metadata_version, + content_hash: metadata.content_hash, + }; + + // Serialize to pretty-printed JSON + let json_output = serde_json::to_string_pretty(&details_output) + .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e)); + + println!("{}", json_output); + }, + "tsv" => { + println!("FMRI\t{}", metadata.fmri); + println!("Status\t{}", metadata.status); + println!("ObsolescenceDate\t{}", metadata.obsolescence_date); + + if let Some(msg) = &metadata.deprecation_message { + println!("DeprecationMessage\t{}", msg); + } + + if let Some(replacements) = &metadata.obsoleted_by { + for (i, replacement) in replacements.iter().enumerate() { + println!("ReplacedBy{}\t{}", i + 1, replacement); + } + } + + println!("MetadataVersion\t{}", metadata.metadata_version); + println!("ContentHash\t{}", metadata.content_hash); + }, + _ => { + return Err(Pkg6RepoError::UnsupportedOutputFormat( + output_format.to_string(), + )); + } + } + + Ok(()) + }, + + Commands::SearchObsoleted { + repo_uri_or_path, + format, + omit_headers, + publisher, + pattern, + limit, + } => { + info!("Searching for obsoleted packages: {} (publisher: {})", pattern, publisher); + + // Open the repository + let mut repo = FileBackend::open(repo_uri_or_path)?; + + // Get the obsoleted packages in a new scope to avoid borrowing issues + let obsoleted_packages = { + // Get the obsoleted package manager + let obsoleted_manager = repo.get_obsoleted_manager()?; + + // Search for obsoleted packages + let mut packages = obsoleted_manager.search_obsoleted_packages(publisher, pattern)?; + + // Apply limit if specified + if let Some(max_results) = limit { + packages.truncate(*max_results); + } + + packages + }; // obsoleted_manager is dropped here, releasing the mutable borrow on repo + + // Determine the output format + let output_format = format.as_deref().unwrap_or("table"); + + match output_format { + "table" => { + // Print headers if not omitted + if !omit_headers { + println!("{:<30} {:<15} {:<10}", "NAME", "VERSION", "PUBLISHER"); + } + + // Print packages + for fmri in obsoleted_packages { + // Format version and publisher, handling optional fields + let version_str = fmri.version(); + + let publisher_str = match &fmri.publisher { + Some(publisher) => publisher.clone(), + None => String::new(), + }; + + println!( + "{:<30} {:<15} {:<10}", + fmri.stem(), + version_str, + publisher_str + ); + } + }, + "json" => { + // Create a JSON representation of the obsoleted packages + let packages_str: Vec = obsoleted_packages.iter().map(|f| f.to_string()).collect(); + let packages_output = ObsoletedPackagesOutput { + packages: packages_str, + }; + + // Serialize to pretty-printed JSON + let json_output = serde_json::to_string_pretty(&packages_output) + .unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e)); + + println!("{}", json_output); + }, + "tsv" => { + // Print headers if not omitted + if !omit_headers { + println!("NAME\tVERSION\tPUBLISHER"); + } + + // Print packages as tab-separated values + for fmri in obsoleted_packages { + // Format version and publisher, handling optional fields + let version_str = fmri.version(); + + let publisher_str = match &fmri.publisher { + Some(publisher) => publisher.clone(), + None => String::new(), + }; + + println!( + "{}\t{}\t{}", + fmri.stem(), + version_str, + publisher_str + ); + } + }, + _ => { + return Err(Pkg6RepoError::UnsupportedOutputFormat( + output_format.to_string(), + )); + } + } + + Ok(()) + }, + + Commands::RestoreObsoleted { + repo_uri_or_path, + publisher, + fmri, + no_rebuild, + } => { + info!("Restoring obsoleted package: {} (publisher: {})", fmri, publisher); + + // Parse the FMRI + let parsed_fmri = libips::fmri::Fmri::parse(fmri)?; + + // Open the repository + let mut repo = FileBackend::open(repo_uri_or_path)?; + + // Get the manifest content and remove the obsoleted package + let manifest_content = { + // Get the obsoleted package manager + let obsoleted_manager = repo.get_obsoleted_manager()?; + + // Get the manifest content and remove the obsoleted package + obsoleted_manager.get_and_remove_obsoleted_package(publisher, &parsed_fmri)? + }; // obsoleted_manager is dropped here, releasing the mutable borrow on repo + + // Parse the manifest + let manifest = libips::actions::Manifest::parse_string(manifest_content)?; + + // Begin a transaction + let mut transaction = repo.begin_transaction()?; + + // Set the publisher for the transaction + transaction.set_publisher(publisher); + + // Update the manifest in the transaction + transaction.update_manifest(manifest); + + // Commit the transaction + transaction.commit()?; + + // Rebuild the catalog if not disabled + if !no_rebuild { + info!("Rebuilding catalog..."); + repo.rebuild(Some(publisher), false, false)?; + } + + info!("Package restored successfully: {}", parsed_fmri); + Ok(()) + }, + + Commands::ExportObsoleted { + repo_uri_or_path, + publisher, + output_file, + pattern, + } => { + info!("Exporting obsoleted packages for publisher: {}", publisher); + + // Open the repository + let mut repo = FileBackend::open(repo_uri_or_path)?; + + // Export the obsoleted packages + let count = { + // Get the obsoleted package manager + let obsoleted_manager = repo.get_obsoleted_manager()?; + + // Export the obsoleted packages + let output_path = PathBuf::from(output_file); + obsoleted_manager.export_obsoleted_packages( + publisher, + pattern.as_deref(), + &output_path, + )? + }; // obsoleted_manager is dropped here, releasing the mutable borrow on repo + + info!("Exported {} obsoleted packages to {}", count, output_file); + Ok(()) + }, + + Commands::ImportObsoleted { + repo_uri_or_path, + input_file, + publisher, + } => { + info!("Importing obsoleted packages from {}", input_file); + + // Open the repository + let mut repo = FileBackend::open(repo_uri_or_path)?; + + // Import the obsoleted packages + let count = { + // Get the obsoleted package manager + let obsoleted_manager = repo.get_obsoleted_manager()?; + + // Import the obsoleted packages + let input_path = PathBuf::from(input_file); + obsoleted_manager.import_obsoleted_packages( + &input_path, + publisher.as_deref(), + )? + }; // obsoleted_manager is dropped here, releasing the mutable borrow on repo + + info!("Imported {} obsoleted packages", count); + Ok(()) } } } diff --git a/pkg6repo/src/pkg5_import.rs b/pkg6repo/src/pkg5_import.rs index 09ca7a0..3d4bd1f 100644 --- a/pkg6repo/src/pkg5_import.rs +++ b/pkg6repo/src/pkg5_import.rs @@ -1,5 +1,6 @@ use crate::error::{Pkg6RepoError, Result}; use libips::actions::Manifest; +use libips::fmri::Fmri; use libips::repository::{FileBackend, ReadableRepository, WritableRepository}; use std::fs::{self, File}; use std::io::{Read, Seek}; @@ -220,14 +221,21 @@ impl Pkg5Importer { } } - // Import packages - self.import_packages(&source_path, &mut dest_repo, publisher_to_import)?; + // Import packages and get counts + let (regular_count, obsoleted_count) = self.import_packages(&source_path, &mut dest_repo, publisher_to_import)?; + let total_count = regular_count + obsoleted_count; // Rebuild catalog and search index info!("Rebuilding catalog and search index..."); dest_repo.rebuild(Some(publisher_to_import), false, false)?; + // Report final statistics info!("Import completed successfully"); + info!("Import summary:"); + info!(" Total packages processed: {}", total_count); + info!(" Regular packages imported: {}", regular_count); + info!(" Obsoleted packages stored: {}", obsoleted_count); + Ok(()) } @@ -259,12 +267,14 @@ impl Pkg5Importer { } /// Imports packages from the source repository + /// + /// Returns a tuple of (regular_package_count, obsoleted_package_count) fn import_packages( &self, source_path: &Path, dest_repo: &mut FileBackend, publisher: &str, - ) -> Result<()> { + ) -> Result<(usize, usize)> { let pkg_dir = source_path.join("publisher").join(publisher).join("pkg"); if !pkg_dir.exists() || !pkg_dir.is_dir() { @@ -288,7 +298,8 @@ impl Pkg5Importer { ); // Find package directories - let mut package_count = 0; + let mut regular_package_count = 0; + let mut obsoleted_package_count = 0; for pkg_entry in fs::read_dir(&pkg_dir).map_err(|e| Pkg6RepoError::IoError(e))? { let pkg_entry = pkg_entry.map_err(|e| Pkg6RepoError::IoError(e))?; @@ -316,7 +327,7 @@ impl Pkg5Importer { debug!("Processing version: {}", decoded_ver_name); // Import this package version - self.import_package_version( + let is_obsoleted = self.import_package_version( source_path, dest_repo, publisher, @@ -326,17 +337,26 @@ impl Pkg5Importer { temp_proto_dir.path(), )?; - package_count += 1; + // Increment the appropriate counter + if is_obsoleted { + obsoleted_package_count += 1; + } else { + regular_package_count += 1; + } } } } } - info!("Imported {} packages", package_count); - Ok(()) + let total_package_count = regular_package_count + obsoleted_package_count; + info!("Imported {} packages ({} regular, {} obsoleted)", + total_package_count, regular_package_count, obsoleted_package_count); + Ok((regular_package_count, obsoleted_package_count)) } /// Imports a specific package version + /// + /// Returns a boolean indicating whether the package was obsoleted fn import_package_version( &self, source_path: &Path, @@ -346,7 +366,7 @@ impl Pkg5Importer { pkg_name: &str, _ver_name: &str, proto_dir: &Path, - ) -> Result<()> { + ) -> Result { debug!("Importing package version from {}", manifest_path.display()); // Extract package name from FMRI @@ -364,8 +384,67 @@ impl Pkg5Importer { // Parse the manifest using parse_string debug!("Parsing manifest content"); - let manifest = Manifest::parse_string(manifest_content)?; + let manifest = Manifest::parse_string(manifest_content.clone())?; + // Check if this is an obsoleted package + let mut is_obsoleted = false; + let mut fmri_str = String::new(); + + // Extract the FMRI from the manifest + for attr in &manifest.attributes { + if attr.key == "pkg.fmri" && !attr.values.is_empty() { + fmri_str = attr.values[0].clone(); + break; + } + } + + // Check for pkg.obsolete attribute + for attr in &manifest.attributes { + if attr.key == "pkg.obsolete" && !attr.values.is_empty() { + if attr.values[0] == "true" { + is_obsoleted = true; + debug!("Found obsoleted package: {}", fmri_str); + break; + } + } + } + + // If this is an obsoleted package, store it in the obsoleted directory + if is_obsoleted && !fmri_str.is_empty() { + debug!("Handling obsoleted package: {}", fmri_str); + + // Parse the FMRI + let fmri = match Fmri::parse(&fmri_str) { + Ok(fmri) => fmri, + Err(e) => { + warn!("Failed to parse FMRI '{}': {}", fmri_str, e); + return Err(Pkg6RepoError::from(format!( + "Failed to parse FMRI '{}': {}", + fmri_str, e + ))); + } + }; + + // Get the obsoleted package manager + let obsoleted_manager = dest_repo.get_obsoleted_manager()?; + + // Store the obsoleted package + debug!("Storing obsoleted package in dedicated directory"); + obsoleted_manager.store_obsoleted_package( + publisher, + &fmri, + &manifest_content, + None, // No obsoleted_by information available + None, // No deprecation message available + )?; + + info!("Stored obsoleted package: {}", fmri); + return Ok(true); // Return true to indicate this was an obsoleted package + } + + // For non-obsoleted packages, proceed with normal import + debug!("Processing regular (non-obsoleted) package"); + // Begin a transaction debug!("Beginning transaction"); let mut transaction = dest_repo.begin_transaction()?; @@ -472,7 +551,7 @@ impl Pkg5Importer { // Commit the transaction transaction.commit()?; - Ok(()) + Ok(false) // Return false to indicate this was a regular (non-obsoleted) package } }