diff --git a/Cargo.lock b/Cargo.lock index f4d548b..249253a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -696,6 +696,17 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +[[package]] +name = "goblin" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b363a30c165f666402fe6a3024d3bec7ebc898f96a4a23bd1c99f8dbf3f4f47" +dependencies = [ + "log", + "plain", + "scroll", +] + [[package]] name = "h2" version = "0.4.12" @@ -1099,6 +1110,7 @@ dependencies = [ "chrono", "diff-struct", "flate2", + "goblin", "lz4", "maplit", "miette", @@ -1605,6 +1617,12 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "ports" version = "0.5.1" @@ -2003,6 +2021,26 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "scroll" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" +dependencies = [ + "scroll_derive", +] + +[[package]] +name = "scroll_derive" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1783eabc414609e28a5ba76aee5ddd52199f7107a0b24c2e9746a1ecc34a683d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "security-framework" version = "2.11.1" diff --git a/doc/rust_docs/pkgdepend-dependency-resolution.txt b/doc/rust_docs/pkgdepend-dependency-resolution.txt new file mode 100644 index 0000000..88f1c56 --- /dev/null +++ b/doc/rust_docs/pkgdepend-dependency-resolution.txt @@ -0,0 +1,254 @@ +pkgdepend dependency resolution overview (ELF, Python, JAR) + +This document describes how pkgdepend analyzes files to infer package +dependencies, based on the current source code in the pkg(5) repository. +It is intended to guide a reimplementation of equivalent checks in Rust. + +High-level flow +- File classification: src/modules/portable/os_sunos.py:get_file_type() reads + the first bytes of each payload and classifies as one of: + - ELF for ELF objects (magic 0x7F 'ELF'). + - EXEC for text files starting with a shebang (#!). + - SMF_MANIFEST for XML files recognized as SMF manifests. + - UNFOUND or unknown for other cases. There is no specific JAR type. +- Dispatch: src/modules/publish/dependencies.py:list_implicit_deps_for_manifest() + maps file types to analyzers: + - ELF -> pkg.flavor.elf.process_elf_dependencies + - EXEC -> pkg.flavor.script.process_script_deps + - SMF_MANIFEST -> pkg.flavor.smf_manifest.process_smf_manifest_deps + Unknown types are recorded in a "missing" map but not analyzed. +- The analyzers return a list of PublishingDependency objects (see + src/modules/flavor/base.py) and a list of analysis errors. These are later + resolved to package-level DependencyAction objects. +- Bypass rules: If pkg.depend.bypass-generate is set (manifest or action), + dependency generation can be skipped or filtered (details below). +- Internal pruning: After file-level dependencies are generated, pkgdepend can + drop dependencies that are satisfied by files delivered by the same package. +- Resolution to packages: Finally, dependencies on files are mapped to package + FMRIs by locating which packages (delivered or already installed) provide + the target files, following links where necessary. + +Controlling run paths and bypass +- pkg.depend.runpath (portable.PD_RUN_PATH): A colon-separated string. + - May be set at manifest level (applies to all actions) and/or per action. + - Verified by __verify_run_path(): must be a single string and not empty. + - Per-action value overrides manifest-level value for that action. + - For ELF analysis, the provided runpath interacts with defaults via the + PD_DEFAULT_RUNPATH token (see below). +- pkg.depend.bypass-generate (portable.PD_BYPASS_GENERATE): a string or list of + strings controlling path patterns to ignore when generating dependencies. + - In list_implicit_deps_for_manifest(): + - If bypass contains a match-all pattern ".*" or "^.*$", analysis for that + action is skipped entirely. A debug attribute is recorded: + pkg.debug.depend.bypassed=":.*". + - Otherwise, __bypass_deps() filters out any matching file paths from the + generated dependencies. Patterns are treated as regex; bare filenames + are expanded to ".*/" and patterns are anchored with ^...$. + Matching paths are recorded in pkg.debug.depend.bypassed; dependencies are + updated to only contain the remaining full paths. + +ELF analysis (pkg.flavor.elf) +Reference: src/modules/flavor/elf.py + +Inputs +- Action (file) with attributes: + - path: installed path (no leading slash in manifests; code often prepends "/"). + - portable.PD_LOCAL_PATH: proto/build file to read. + - portable.PD_PROTO_DIR: base dir of the proto area. +- pkg_vars: package variant template (propagated to dependencies). +- dyn_tok_conv: map of dynamic tokens to expansion lists (e.g. $PLATFORM). +- run_paths: optional run path list from pkg.depend.runpath (colon-split). + +Steps +1) Verify file exists and is an ELF object (pkg.elf.is_elf_object). If not, + return no deps. +2) Parse headers and dynamic info: + - elf.get_info(proto_file) -> bits (32/64), arch (i386/sparc). + - elf.get_dynamic(proto_file) -> + - deps: list of DT_NEEDED entries; code uses [d[0] for d in deps]. + - runpath: DT_RUNPATH string (may be empty). +3) Build default search path rp: + - Start with DT_RUNPATH split by ":". Empty string becomes []. + - dyn_tok_conv["$ORIGIN"] is set to ["/" + dirname(installed_path)] so + $ORIGIN can be expanded in paths. + - Kernel modules (installed_path under kernel/, usr/kernel, or + platform//kernel): + - If runpath is set to anything except the specific /usr/gcc//lib case, + raise RuntimeError. Otherwise runpath for kernel modules is derived as: + - For platform paths, append /platform//kernel; otherwise for + each $PLATFORM in dyn_tok_conv append /platform//kernel. + - Append default kernel paths: /kernel and /usr/kernel. + - If 64-bit, a kernel64 subdir is used to assemble candidate paths when + constructing dependencies: arch -> i386 => amd64; sparc => sparcv9. + - Non-kernel ELF: + - Ensure /lib and /usr/lib are present; for 64-bit also add /lib/64 and + /usr/lib/64. +4) Merge caller-provided run_paths: + - If run_paths is provided, base.insert_default_runpath(rp, run_paths) is + used. This replaces any PD_DEFAULT_RUNPATH token in run_paths with the + default rp. If the token is absent, the provided run_paths fully override + rp. Multiple PD_DEFAULT_RUNPATH tokens raise an error. +5) Expand dynamic tokens in rp: + - expand_variables() recursively replaces $TOKENS using dyn_tok_conv. + - Unknown tokens produce UnsupportedDynamicToken errors (non-fatal) which + are returned in the error list. +6) For each DT_NEEDED library name d: + - For each expanded run path p, form a candidate directory by joining p and + d; for kernel64 cases, insert amd64/sparcv9 as appropriate; drop the final + filename to retain only directories (run_paths for this dependency). + - Create an ElfDependency(action, base_name=basename(d), run_paths=dirs, + pkg_vars, proto_dir). + +Semantics of ElfDependency +- Inherits PublishingDependency (see below). It resolves against delivered files + by joining each run_path with base_name to form candidates. +- resolve_internal() is overridden to treat the case where no path resolves but + a file with the same base name is delivered by this package as a WARNING + instead of an ERROR (assumes external runpath will make it available). + That sets pkg.debug.depend.*.severity=warning and marks variants accordingly. + +Python/script analysis (pkg.flavor.script + pkg.flavor.python) +References: +- src/modules/flavor/script.py +- src/modules/flavor/python.py + +Shebang handling (script.py) +- For any file with a shebang (#!) and the executable bit set: + - Extract interpreter path (first token after #!). If not absolute, record + ScriptNonAbsPath error. + - Normalize /bin/... to /usr/bin/... and add a ScriptDependency on that + interpreter path (base_name = last component; run_paths = directory). +- If the shebang line contains the substring "python" (e.g. #!/usr/bin/python3.9), + python-specific analysis is triggered by calling + python.process_python_dependencies(action, pkg_vars, script_path, run_paths), + where script_path is the full shebang line and run_paths is the effective + pkg.depend.runpath for the action. + +Python dependency discovery (python.py) +- Version inference: + - Installed path starting with usr/lib/python./ implies a + version (dir_major/dir_minor). + - Shebang matching ^#!/usr/bin/(/)?python. implies a + version (file_major/file_minor). + - If the file is executable and both imply versions that disagree, record a + PythonMismatchedVersion error and use the directory version for analysis. + - Analysis version selection: + - If installed path implies version, use that. + - Else if shebang implies version, use that. + - Else if executable but no specific version (e.g. #!/usr/bin/python), + record PythonUnspecifiedVersion and skip analysis. + - Else if not executable but installed under usr/lib/pythonX.Y, analyze + with that version. +- Performing analysis: + - If the selected version equals the currently running interpreter + (sys.version_info), use in-process analysis: + - Construct DepthLimitedModuleFinder with the install directory as the + base and pass through run_paths (pkg.depend.runpath). The finder executes + the local proto file (action.attrs[PD_LOCAL_PATH]) to discover imports. + - For each loaded module, obtain the list of file names (basenames of the + modules) and the directories searched (m.dirs). Create + PythonDependency(action, base_names=module file names, run_paths=dirs,...). + - Any missing imports are reported as PythonModuleMissingPath errors. + - Syntax errors are reported as PythonSyntaxError. + - If the selected version differs from the running interpreter: + - Spawn a subprocess: "python. depthlimitedmf.py + [run_paths ...]". + - Parse stdout lines: + - "DEP " -> add PythonDependency for those. + - "ERR " -> record PythonModuleMissingPath. + - Anything else -> PythonSubprocessBadLine. + - Nonzero exit -> PythonSubprocessError with return code and stderr. + +About JAR archives +- There is no special handling of JAR files in the current implementation. + - get_file_type() does not classify JARs and there is no flavor/jar module. + - The historical doc/elf-jar-handling.txt mentions the idea of tasting JARs, + but this has not been implemented in pkgdepend. +- Consequently, pkgdepend does not extract dependencies from .jar manifests or + classpaths. Any Java/JAR dependency tracking must be handled out-of-band + (e.g., manual packaging dependencies or future tooling). + +PublishingDependency mechanics (flavor/base.py) +- A PublishingDependency represents a dependency on one or more files located + via a list of run_paths and base_names, or via an explicit full_paths list. +- It stores debug attributes under the pkg.debug.depend.* namespace: + - .file (base names), .path (run paths) or .fullpath (explicit paths) + - .type (elf/python/script/smf/link), .reason, .via-links, .bypassed, etc. +- possibly_delivered(): + - For each candidate path (join of run_path and base_name, or each full_path), + calls resolve_links() to account for symlinks and hardlinks and to find + real provided paths. + - If a path resolves and the resulting path is among delivered files, the + dependency is considered satisfied under the relevant variant combination. +- resolve_internal(): + - Checks if another file delivered by the same package satisfies the + dependency (via possibly_delivered against the package’s own files/links). + - If so, the dependency is pruned. Otherwise, the error is recorded, subject + to ELF’s special warning downgrade noted above. + +Resolving dependencies to packages (dependencies.py) +- add_fmri_path_mapping(): builds maps from paths to (PFMRI, variant + combinations) for both the currently delivered manifests and the installed + image (if used). +- resolve_links(path, files_dict, links, path_vars, attrs): + - Recursively follows link chains to real paths, accumulating variant + constraints along the way and generating conditional dependencies when a + link from one package points to a file delivered by another. +- find_package_using_delivered_files(): + - For each dependency, computes all candidate paths (make_paths()), resolves + them through links (resolve_links), groups results by variant combinations, + and then constructs either: + - type=require if exactly one provider package resolves the dependency, or + - type=require-any if multiple packages could satisfy it. + - Debug attributes include: + - pkg.debug.depend.file/path/fullpath + - pkg.debug.depend.via-links (colon-separated link chain per resolution) + - pkg.debug.depend.path-id (a stable id grouping related path attempts) + - Link-derived conditional dependencies (type=conditional) are emitted to + encode that a dependency is only needed when a particular link provider is + present. +- find_package(): tries delivered files first; if not fully satisfied and + allowed, tries files installed in the current image. +- combine(), __collapse_conditionals(), __remove_unneeded_require_and_require_any(): + - Perform simplification and deduplication of the emitted dependencies and + collapse conditional groups where possible. + +Variants and conversion to actions +- Each dependency carries variant constraints (VariantCombinations). After + generation and internal pruning, convert_to_standard_dep_actions() splits + dependencies by unsatisfied variant combinations, producing standard + actions.depend.DependencyAction instances ready for output. + +Run path insertion rule (PD_DEFAULT_RUNPATH) +- base.insert_default_runpath(default_runpath, run_paths) merges default + analyzer-detected search paths with user-provided run_paths: + - If run_paths includes the PD_DEFAULT_RUNPATH token, the default_runpath is + spliced at that position. + - If the token is absent, run_paths replaces the default entirely. + - Multiple tokens raise MultipleDefaultRunpaths. + +Notes for Rust implementation +- ELF: + - Parse DT_NEEDED and DT_RUNPATH. Handle $ORIGIN (directory of installed + path) and $PLATFORM expansion. Implement kernel module path rules and + 64-bit subdir logic. Merge user run paths via PD_DEFAULT_RUNPATH rules. + - Build dependencies keyed by base name with a directory search list. + - When pruning internal deps, downgrade to warning if base name is delivered + by the same package but no path matches. +- Python: + - Determine Python version from installed path or shebang. Flag mismatches. + - Execute import discovery with a depth-limited module finder; if the target + version differs, spawn the matching interpreter to run a helper script and + parse outputs. Include run_paths in module search. +- JAR: + - No current implementation. Decide whether to add support or retain current + behavior (no automatic JAR dependency extraction). +- General: + - Implement bypass rules and debug attributes to aid diagnostics. + - Implement link resolution and conditional dependency emission. + - Respect variant tracking and final conversion to concrete dependency + actions. + +Cross-reference +- Historical note in doc/elf-jar-handling.txt discusses possible JAR handling, + but the current codebase does not implement JAR dependency analysis. diff --git a/libips/Cargo.toml b/libips/Cargo.toml index 539f298..e4cb2e8 100644 --- a/libips/Cargo.toml +++ b/libips/Cargo.toml @@ -22,6 +22,7 @@ miette = "7.6.0" tracing = "0.1.37" maplit = "1" object = "0.37" +goblin = "0.8" sha2 = "0.10" sha3 = "0.10" pest = "2.1.3" diff --git a/libips/src/depend/mod.rs b/libips/src/depend/mod.rs new file mode 100644 index 0000000..ca857da --- /dev/null +++ b/libips/src/depend/mod.rs @@ -0,0 +1,619 @@ +// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. +// If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::actions::{Dependency as ManifestDependency, Manifest}; +use crate::fmri::Fmri; +use crate::repository::ReadableRepository; +use miette::Diagnostic; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fs; +use std::path::{Path, PathBuf}; +use std::error::Error as StdError; +use thiserror::Error; +use tracing::{debug, warn}; + +pub type Result = std::result::Result; + +#[derive(Error, Debug, Diagnostic)] +#[error("Dependency generation error: {message}")] +#[diagnostic(code(ips::depend_error), help("Review inputs and file types"))] +pub struct DependError { + pub message: String, + #[source] + pub source: Option>, // keep library crate simple +} + +impl DependError { + fn new(message: impl Into) -> Self { + Self { message: message.into(), source: None } + } + fn with_source(message: impl Into, source: Box) -> Self { + Self { message: message.into(), source: Some(source) } + } +} + +/// Options controlling dependency generation +#[derive(Debug, Clone, Default, Deserialize, Serialize)] +pub struct GenerateOptions { + /// Colon-separated runpath override to be applied to all actions (manifest-level). + /// If it contains the PD_DEFAULT_RUNPATH token, default runpaths will be inserted at that position. + pub runpath: Option, + /// Regex patterns to bypass dependency generation (skip matching actions entirely). + pub bypass_patterns: Vec, + /// Proto directory base; used to locate local files when only manifest relative paths are known. + pub proto_dir: Option, +} + +/// Token name used to splice in the analyzer default runpaths. +pub const PD_DEFAULT_RUNPATH: &str = "PD_DEFAULT_RUNPATH"; + +/// Intermediate file-level dependency representation +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FileDepKind { + Elf { + /// The base filename needed (from DT_NEEDED) + base_name: String, + /// Directories searched to find the base_name + run_paths: Vec, + /// Installed path of the object declaring the dependency + installed_path: String, + }, + Script { + /// The base filename of the interpreter (e.g., python3, sh) + base_name: String, + /// Directories searched to find the interpreter + run_paths: Vec, + /// Installed path of the script declaring the dependency + installed_path: String, + }, + Python { + /// Candidate module file basenames (e.g., foo.py, foo.so) + base_names: Vec, + /// Directories searched for Python modules for the selected version + run_paths: Vec, + /// Installed path of the script/module declaring the dependency + installed_path: String, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileDep { + pub kind: FileDepKind, +} + +/// Convert manifest file actions into FileDep entries (ELF only for now). +pub fn generate_file_dependencies_from_manifest(manifest: &Manifest, opts: &GenerateOptions) -> Result> { + let mut out = Vec::new(); + let bypass = compile_bypass(&opts.bypass_patterns)?; + + for f in &manifest.files { + // Determine installed path (manifests typically do not start with '/'). + let installed_path = if f.path.starts_with('/') { f.path.clone() } else { format!("/{}", f.path) }; + + if should_bypass(&installed_path, &bypass) { + debug!("bypassing dependency generation for {} per patterns", installed_path); + continue; + } + + // Try to find the local file to analyze: prefer explicit original-path property; if it's relative, resolve against proto_dir. + let local_path = match f.get_original_path() { + Some(op) => { + let p = PathBuf::from(&op); + if p.is_absolute() { + p + } else if let Some(base) = &opts.proto_dir { + let cand = base.join(op.trim_start_matches('/')); + if cand.exists() { + cand + } else { + // Fallback to proto_dir + installed_path + base.join(installed_path.trim_start_matches('/')) + } + } else { + // Relative without proto_dir: try as-is (may be relative to CWD) + PathBuf::from(op) + } + } + None => match &opts.proto_dir { + Some(base) => base.join(installed_path.trim_start_matches('/')), + None => continue, // no local file to analyze; skip + }, + }; + + // Read local bytes once + if let Ok(bytes) = fs::read(&local_path) { + // ELF check + if bytes.len() >= 4 && &bytes[0..4] == b"\x7FELF" { + let mut deps = process_elf(&bytes, &installed_path, opts); + out.append(&mut deps); + continue; + } + + // Script shebang check + if let Some(interp) = parse_shebang(&bytes) { + // Optional: ensure executable; if mode missing, assume executable + let exec_ok = is_executable_mode(&f.mode); + if !exec_ok { + // Not executable; skip script dependency + continue; + } + // Normalize /bin -> /usr/bin + let interp_path = normalize_bin_path(&interp); + if !interp_path.starts_with('/') { + warn!("Script shebang for {} specifies non-absolute interpreter: {}", installed_path, interp_path); + } else { + // Derive dir and base name + let (dir, base) = split_dir_base(&interp_path); + if let Some(dir) = dir { + out.push(FileDep { kind: FileDepKind::Script { base_name: base.to_string(), run_paths: vec![dir.to_string()], installed_path: installed_path.clone() } }); + // If Python interpreter, perform Python analysis + if interp_path.contains("python") { + if let Some((maj, min)) = infer_python_version_from_paths(&installed_path, Some(&interp_path)) { + let mut pydeps = process_python(&bytes, &installed_path, (maj, min), opts); + out.append(&mut pydeps); + } + } + } + } + } else { + // If no shebang or non-exec, but file is under usr/lib/pythonX.Y/, analyze as module + if let Some((maj, min)) = infer_python_version_from_paths(&installed_path, None) { + let mut pydeps = process_python(&bytes, &installed_path, (maj, min), opts); + out.append(&mut pydeps); + } + } + + // SMF manifest detection: extract exec paths + if looks_like_smf_manifest(&bytes) { + for exec_path in extract_smf_execs(&bytes) { + if exec_path.starts_with('/') { + let (dir, base) = split_dir_base(&exec_path); + if let Some(dir) = dir { + out.push(FileDep { kind: FileDepKind::Script { base_name: base.to_string(), run_paths: vec![dir.to_string()], installed_path: installed_path.clone() } }); + } + } + } + } + } + } + + Ok(out) +} + +/// Insert default runpaths into provided runpaths based on PD_DEFAULT_RUNPATH token +fn insert_default_runpath(defaults: &[String], provided: &[String]) -> std::result::Result, DependError> { + let mut out = Vec::new(); + let mut token_count = 0; + for p in provided { + if p == PD_DEFAULT_RUNPATH { + token_count += 1; + if token_count > 1 { + return Err(DependError::new("Multiple PD_DEFAULT_RUNPATH tokens in runpath override")); + } + out.extend_from_slice(defaults); + } else { + out.push(p.clone()); + } + } + if token_count == 0 { + // Override replaces defaults + Ok(provided.to_vec()) + } else { + Ok(out) + } +} + +fn compile_bypass(patterns: &[String]) -> Result> { + let mut out = Vec::new(); + for p in patterns { + out.push(Regex::new(p).map_err(|e| DependError::with_source(format!("invalid bypass pattern: {}", p), Box::new(e)))?); + } + Ok(out) +} + +fn should_bypass(path: &str, patterns: &[Regex]) -> bool { + patterns.iter().any(|re| re.is_match(path)) +} + +fn process_elf(bytes: &[u8], installed_path: &str, opts: &GenerateOptions) -> Vec { + let mut out = Vec::new(); + match goblin::elf::Elf::parse(bytes) { + Ok(elf) => { + // DT_NEEDED entries + let mut needed: Vec = elf.libraries.iter().map(|s| s.to_string()).collect(); + if needed.is_empty() { + return out; + } + + // Default runpaths + let mut defaults: Vec = vec!["/lib".into(), "/usr/lib".into()]; + // crude bitness check: presence of 64-bit elf class + if elf.is_64 { + defaults.push("/lib/64".into()); + defaults.push("/usr/lib/64".into()); + } + + // DT_RUNPATH + let mut runpaths: Vec = Vec::new(); + if !elf.runpaths.is_empty() { + for rp in &elf.runpaths { + for seg in rp.split(':') { + if !seg.is_empty() { + runpaths.push(seg.to_string()); + } + } + } + } + + // Merge with defaults using PD_DEFAULT_RUNPATH semantics if caller provided runpath override + let effective = if let Some(ref rp) = opts.runpath { + let provided: Vec = rp.split(':').map(|s| s.to_string()).collect(); + match insert_default_runpath(&defaults, &provided) { + Ok(v) => v, + Err(e) => { + warn!("{}", e.message); + provided + } + } + } else { + // If no override, prefer DT_RUNPATH if present else defaults + if runpaths.is_empty() { defaults.clone() } else { runpaths.clone() } + }; + + // Expand $ORIGIN + let origin = Path::new(installed_path).parent().map(|p| p.display().to_string()).unwrap_or_else(|| "/".to_string()); + let expanded: Vec = effective + .into_iter() + .map(|p| p.replace("$ORIGIN", &origin)) + .collect(); + + // Emit FileDep for each DT_NEEDED base name + for bn in needed.drain(..) { + out.push(FileDep { kind: FileDepKind::Elf { base_name: bn, run_paths: expanded.clone(), installed_path: installed_path.to_string() } }); + } + } + Err(err) => warn!("ELF parse error for {}: {}", installed_path, err), + } + out +} + +/// Resolve file-level dependencies into manifest Dependency actions by consulting a repository. +pub fn resolve_dependencies( + repo: &R, + publisher: Option<&str>, + file_deps: &[FileDep], +) -> Result> { + // Build a mapping from path -> providers (FMRIs) + let path_map = build_path_provider_map(repo, publisher)?; + + let mut deps: Vec = Vec::new(); + + for fd in file_deps { + match &fd.kind { + FileDepKind::Elf { base_name, run_paths, .. } => { + let mut providers: Vec = Vec::new(); + for dir in run_paths { + let full = normalize_join(dir, base_name); + if let Some(list) = path_map.get(&full) { + for f in list { + if !providers.contains(f) { + providers.push(f.clone()); + } + } + } + } + if providers.len() == 1 { + let fmri = providers.remove(0); + deps.push(ManifestDependency { + fmri: Some(fmri), + dependency_type: "require".to_string(), + predicate: None, + root_image: String::new(), + optional: Vec::new(), + facets: HashMap::new(), + }); + } else if providers.len() > 1 { + // Our model lacks a group for require-any; emit one per FMRI + for fmri in providers.into_iter() { + deps.push(ManifestDependency { + fmri: Some(fmri), + dependency_type: "require-any".to_string(), + predicate: None, + root_image: String::new(), + optional: Vec::new(), + facets: HashMap::new(), + }); + } + } else { + // unresolved -> skip for now; future: emit analysis warnings + } + } + FileDepKind::Script { base_name, run_paths, .. } => { + let mut providers: Vec = Vec::new(); + for dir in run_paths { + let full = normalize_join(dir, base_name); + if let Some(list) = path_map.get(&full) { + for f in list { + if !providers.contains(f) { + providers.push(f.clone()); + } + } + } + } + if providers.len() == 1 { + let fmri = providers.remove(0); + deps.push(ManifestDependency { + fmri: Some(fmri), + dependency_type: "require".to_string(), + predicate: None, + root_image: String::new(), + optional: Vec::new(), + facets: HashMap::new(), + }); + } else if providers.len() > 1 { + for fmri in providers.into_iter() { + deps.push(ManifestDependency { + fmri: Some(fmri), + dependency_type: "require-any".to_string(), + predicate: None, + root_image: String::new(), + optional: Vec::new(), + facets: HashMap::new(), + }); + } + } else { + } + } + FileDepKind::Python { base_names, run_paths, .. } => { + let mut providers: Vec = Vec::new(); + for dir in run_paths { + for base in base_names { + let full = normalize_join(dir, base); + if let Some(list) = path_map.get(&full) { + for f in list { + if !providers.contains(f) { + providers.push(f.clone()); + } + } + } + } + } + if providers.len() == 1 { + let fmri = providers.remove(0); + deps.push(ManifestDependency { + fmri: Some(fmri), + dependency_type: "require".to_string(), + predicate: None, + root_image: String::new(), + optional: Vec::new(), + facets: HashMap::new(), + }); + } else if providers.len() > 1 { + for fmri in providers.into_iter() { + deps.push(ManifestDependency { + fmri: Some(fmri), + dependency_type: "require-any".to_string(), + predicate: None, + root_image: String::new(), + optional: Vec::new(), + facets: HashMap::new(), + }); + } + } else { + } + } + } + } + + Ok(deps) +} + +fn normalize_join(dir: &str, base: &str) -> String { + if dir.ends_with('/') { + format!("{}{}", dir.trim_end_matches('/'), format!("/{}", base)) + } else { + format!("{}/{}", dir, base) + } +} + +fn build_path_provider_map(repo: &R, publisher: Option<&str>) -> Result>> { + // Ask repo to show contents for all packages (files only) + let contents = repo + .show_contents(publisher, None, Some(&["file".to_string()])) + .map_err(|e| DependError::with_source("Repository show_contents failed", Box::new(e)))?; + + let mut map: HashMap> = HashMap::new(); + for pc in contents { + let fmri = match pc.package_id.parse::() { + Ok(f) => f, + Err(e) => { + warn!("Skipping package with invalid FMRI {}: {}", pc.package_id, e); + continue; + } + }; + if let Some(files) = pc.files { + for p in files { + // Ensure leading slash + let key = if p.starts_with('/') { p } else { format!("/{}", p) }; + map.entry(key).or_default().push(fmri.clone()); + } + } + } + Ok(map) +} + + +// --- Helpers for script processing --- +fn parse_shebang(bytes: &[u8]) -> Option { + if bytes.len() < 2 || bytes[0] != b'#' || bytes[1] != b'!' { + return None; + } + // Extract first line after #! + let mut end = bytes.len(); + for (i, b) in bytes.iter().enumerate().skip(2) { + if *b == b'\n' || *b == b'\r' { + end = i; + break; + } + } + let line = &bytes[2..end]; + let text = String::from_utf8_lossy(line); + let s = text.trim(); + if s.is_empty() { + return None; + } + // First token is interpreter path + let mut parts = s.split_whitespace(); + parts.next().map(|p| p.to_string()) +} + +fn is_executable_mode(mode_str: &str) -> bool { + // If mode is empty or unparsable, assume executable to avoid missing deps + let ms = mode_str.trim(); + if ms.is_empty() { + return true; + } + // Accept strings like "0755" or "755" + match u32::from_str_radix(ms.trim_start_matches('0'), 8) { + Ok(bits) => bits & 0o111 != 0, + Err(_) => true, + } +} + +fn normalize_bin_path(path: &str) -> String { + if path.starts_with("/bin/") { + path.replacen("/bin/", "/usr/bin/", 1) + } else { + path.to_string() + } +} + +fn split_dir_base<'a>(path: &'a str) -> (Option<&'a str>, &'a str) { + if let Some(idx) = path.rfind('/') { + if idx == 0 { + return (Some("/"), &path[1..]); + } + (Some(&path[..idx]), &path[idx + 1..]) + } else { + (None, path) + } +} + + +fn looks_like_smf_manifest(bytes: &[u8]) -> bool { + // Very lightweight detection: SMF manifests are XML files with a root + // We do a lossy UTF-8 conversion and look for the tag to avoid a full XML parser. + let text = String::from_utf8_lossy(bytes); + text.contains(") -> Option<(u8, u8)> { + // Prefer version implied by installed path under /usr/lib/pythonX.Y + if let Ok(re) = Regex::new(r"^/usr/lib/python(\d+)\.(\d+)(/|$)") { + if let Some(c) = re.captures(installed_path) { + if let (Some(ma), Some(mi)) = (c.get(1), c.get(2)) { + if let (Ok(maj), Ok(min)) = (ma.as_str().parse::(), mi.as_str().parse::()) { + return Some((maj, min)); + } + } + } + } + // Else, try to infer from shebang interpreter path (e.g., /usr/bin/python3.11) + if let Some(sb) = shebang_path { + if let Ok(re) = Regex::new(r"python(\d+)\.(\d+)") { + if let Some(c) = re.captures(sb) { + if let (Some(ma), Some(mi)) = (c.get(1), c.get(2)) { + if let (Ok(maj), Ok(min)) = (ma.as_str().parse::(), mi.as_str().parse::()) { + return Some((maj, min)); + } + } + } + } + } + None +} + +fn compute_python_runpaths(version: (u8, u8), opts: &GenerateOptions) -> Vec { + let (maj, min) = version; + let base = format!("/usr/lib/python{}.{}", maj, min); + let mut defaults = vec![ + base.clone(), + format!("{}/vendor-packages", base), + format!("{}/site-packages", base), + format!("{}/lib-dynload", base), + ]; + if let Some(ref rp) = opts.runpath { + let provided: Vec = rp.split(':').map(|s| s.to_string()).collect(); + match insert_default_runpath(&defaults, &provided) { + Ok(v) => v, + Err(_) => provided, + } + } else { + defaults + } +} + +fn collect_python_imports(src: &str) -> Vec { + let mut mods = Vec::new(); + // Regex for 'import x[.y][, z]' - handle only first module per line for simplicity + if let Ok(re_imp) = Regex::new(r"(?m)^\s*import\s+([A-Za-z_][A-Za-z0-9_\.]*)") { + for cap in re_imp.captures_iter(src) { + if let Some(m) = cap.get(1) { + let name = m.as_str().split('.').next().unwrap_or("").to_string(); + if !name.is_empty() && !mods.contains(&name) { + mods.push(name); + } + } + } + } + // Regex for 'from x.y import ...' + if let Ok(re_from) = Regex::new(r"(?m)^\s*from\s+([A-Za-z_][A-Za-z0-9_\.]*)\s+import\s+") { + for cap in re_from.captures_iter(src) { + if let Some(m) = cap.get(1) { + let name = m.as_str().split('.').next().unwrap_or("").to_string(); + if !name.is_empty() && !mods.contains(&name) { + mods.push(name); + } + } + } + } + mods +} + +fn process_python(bytes: &[u8], installed_path: &str, version: (u8, u8), opts: &GenerateOptions) -> Vec { + let text = String::from_utf8_lossy(bytes); + let imports = collect_python_imports(&text); + if imports.is_empty() { + return Vec::new(); + } + // Base names to search: module.py and module.so + let mut base_names: Vec = Vec::new(); + for m in imports { + let py = format!("{}.py", m); + let so = format!("{}.so", m); + if !base_names.contains(&py) { base_names.push(py); } + if !base_names.contains(&so) { base_names.push(so); } + } + let run_paths = compute_python_runpaths(version, opts); + vec![FileDep { kind: FileDepKind::Python { base_names, run_paths, installed_path: installed_path.to_string() } }] +} + +// --- SMF helpers --- +fn extract_smf_execs(bytes: &[u8]) -> Vec { + let text = String::from_utf8_lossy(bytes); + let mut out = Vec::new(); + // Match exec="..." or exec='...' + if let Ok(re) = Regex::new(r#"exec\s*=\s*\"([^\"]+)\"|exec\s*=\s*'([^']+)'"#) { + for cap in re.captures_iter(&text) { + let m = cap.get(1).or_else(|| cap.get(2)); + if let Some(v) = m { + let val = v.as_str().to_string(); + if !out.contains(&val) { out.push(val); } + } + } + } + out +} diff --git a/libips/src/lib.rs b/libips/src/lib.rs index 85d6090..a7029f4 100644 --- a/libips/src/lib.rs +++ b/libips/src/lib.rs @@ -13,6 +13,7 @@ pub mod repository; pub mod publisher; pub mod transformer; pub mod solver; +pub mod depend; mod test_json_manifest; #[cfg(test)]