mirror of
https://codeberg.org/Toasterson/ips.git
synced 2026-04-10 13:20:42 +00:00
Add depend module for file-level dependency generation
- Introduced `depend.rs` to handle dependency generation for ELF files, scripts, Python modules, and SMF manifests. - Implemented file classification and analysis logic with configurable bypass rules and runpath handling. - Added utility functions to resolve file dependencies into manifest actions using a provided repository. - Updated `Cargo.toml` with `goblin` dependency for ELF processing. - Enhanced codebase with default runpath insertion, dynamic token expansion, and Python module import detection. - Included `pkgdepend` documentation for dependency resolution overview.
This commit is contained in:
parent
7cffa6c4e6
commit
77f02fdfbd
5 changed files with 913 additions and 0 deletions
38
Cargo.lock
generated
38
Cargo.lock
generated
|
|
@ -696,6 +696,17 @@ version = "0.31.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
|
||||
|
||||
[[package]]
|
||||
name = "goblin"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b363a30c165f666402fe6a3024d3bec7ebc898f96a4a23bd1c99f8dbf3f4f47"
|
||||
dependencies = [
|
||||
"log",
|
||||
"plain",
|
||||
"scroll",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.4.12"
|
||||
|
|
@ -1099,6 +1110,7 @@ dependencies = [
|
|||
"chrono",
|
||||
"diff-struct",
|
||||
"flate2",
|
||||
"goblin",
|
||||
"lz4",
|
||||
"maplit",
|
||||
"miette",
|
||||
|
|
@ -1605,6 +1617,12 @@ dependencies = [
|
|||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plain"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
|
||||
|
||||
[[package]]
|
||||
name = "ports"
|
||||
version = "0.5.1"
|
||||
|
|
@ -2003,6 +2021,26 @@ dependencies = [
|
|||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scroll"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6"
|
||||
dependencies = [
|
||||
"scroll_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scroll_derive"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1783eabc414609e28a5ba76aee5ddd52199f7107a0b24c2e9746a1ecc34a683d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "2.11.1"
|
||||
|
|
|
|||
254
doc/rust_docs/pkgdepend-dependency-resolution.txt
Normal file
254
doc/rust_docs/pkgdepend-dependency-resolution.txt
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
pkgdepend dependency resolution overview (ELF, Python, JAR)
|
||||
|
||||
This document describes how pkgdepend analyzes files to infer package
|
||||
dependencies, based on the current source code in the pkg(5) repository.
|
||||
It is intended to guide a reimplementation of equivalent checks in Rust.
|
||||
|
||||
High-level flow
|
||||
- File classification: src/modules/portable/os_sunos.py:get_file_type() reads
|
||||
the first bytes of each payload and classifies as one of:
|
||||
- ELF for ELF objects (magic 0x7F 'ELF').
|
||||
- EXEC for text files starting with a shebang (#!).
|
||||
- SMF_MANIFEST for XML files recognized as SMF manifests.
|
||||
- UNFOUND or unknown for other cases. There is no specific JAR type.
|
||||
- Dispatch: src/modules/publish/dependencies.py:list_implicit_deps_for_manifest()
|
||||
maps file types to analyzers:
|
||||
- ELF -> pkg.flavor.elf.process_elf_dependencies
|
||||
- EXEC -> pkg.flavor.script.process_script_deps
|
||||
- SMF_MANIFEST -> pkg.flavor.smf_manifest.process_smf_manifest_deps
|
||||
Unknown types are recorded in a "missing" map but not analyzed.
|
||||
- The analyzers return a list of PublishingDependency objects (see
|
||||
src/modules/flavor/base.py) and a list of analysis errors. These are later
|
||||
resolved to package-level DependencyAction objects.
|
||||
- Bypass rules: If pkg.depend.bypass-generate is set (manifest or action),
|
||||
dependency generation can be skipped or filtered (details below).
|
||||
- Internal pruning: After file-level dependencies are generated, pkgdepend can
|
||||
drop dependencies that are satisfied by files delivered by the same package.
|
||||
- Resolution to packages: Finally, dependencies on files are mapped to package
|
||||
FMRIs by locating which packages (delivered or already installed) provide
|
||||
the target files, following links where necessary.
|
||||
|
||||
Controlling run paths and bypass
|
||||
- pkg.depend.runpath (portable.PD_RUN_PATH): A colon-separated string.
|
||||
- May be set at manifest level (applies to all actions) and/or per action.
|
||||
- Verified by __verify_run_path(): must be a single string and not empty.
|
||||
- Per-action value overrides manifest-level value for that action.
|
||||
- For ELF analysis, the provided runpath interacts with defaults via the
|
||||
PD_DEFAULT_RUNPATH token (see below).
|
||||
- pkg.depend.bypass-generate (portable.PD_BYPASS_GENERATE): a string or list of
|
||||
strings controlling path patterns to ignore when generating dependencies.
|
||||
- In list_implicit_deps_for_manifest():
|
||||
- If bypass contains a match-all pattern ".*" or "^.*$", analysis for that
|
||||
action is skipped entirely. A debug attribute is recorded:
|
||||
pkg.debug.depend.bypassed="<action path>:.*".
|
||||
- Otherwise, __bypass_deps() filters out any matching file paths from the
|
||||
generated dependencies. Patterns are treated as regex; bare filenames
|
||||
are expanded to ".*/<name>" and patterns are anchored with ^...$.
|
||||
Matching paths are recorded in pkg.debug.depend.bypassed; dependencies are
|
||||
updated to only contain the remaining full paths.
|
||||
|
||||
ELF analysis (pkg.flavor.elf)
|
||||
Reference: src/modules/flavor/elf.py
|
||||
|
||||
Inputs
|
||||
- Action (file) with attributes:
|
||||
- path: installed path (no leading slash in manifests; code often prepends "/").
|
||||
- portable.PD_LOCAL_PATH: proto/build file to read.
|
||||
- portable.PD_PROTO_DIR: base dir of the proto area.
|
||||
- pkg_vars: package variant template (propagated to dependencies).
|
||||
- dyn_tok_conv: map of dynamic tokens to expansion lists (e.g. $PLATFORM).
|
||||
- run_paths: optional run path list from pkg.depend.runpath (colon-split).
|
||||
|
||||
Steps
|
||||
1) Verify file exists and is an ELF object (pkg.elf.is_elf_object). If not,
|
||||
return no deps.
|
||||
2) Parse headers and dynamic info:
|
||||
- elf.get_info(proto_file) -> bits (32/64), arch (i386/sparc).
|
||||
- elf.get_dynamic(proto_file) ->
|
||||
- deps: list of DT_NEEDED entries; code uses [d[0] for d in deps].
|
||||
- runpath: DT_RUNPATH string (may be empty).
|
||||
3) Build default search path rp:
|
||||
- Start with DT_RUNPATH split by ":". Empty string becomes [].
|
||||
- dyn_tok_conv["$ORIGIN"] is set to ["/" + dirname(installed_path)] so
|
||||
$ORIGIN can be expanded in paths.
|
||||
- Kernel modules (installed_path under kernel/, usr/kernel, or
|
||||
platform/<platform>/kernel):
|
||||
- If runpath is set to anything except the specific /usr/gcc/<n>/lib case,
|
||||
raise RuntimeError. Otherwise runpath for kernel modules is derived as:
|
||||
- For platform paths, append /platform/<platform>/kernel; otherwise for
|
||||
each $PLATFORM in dyn_tok_conv append /platform/<plat>/kernel.
|
||||
- Append default kernel paths: /kernel and /usr/kernel.
|
||||
- If 64-bit, a kernel64 subdir is used to assemble candidate paths when
|
||||
constructing dependencies: arch -> i386 => amd64; sparc => sparcv9.
|
||||
- Non-kernel ELF:
|
||||
- Ensure /lib and /usr/lib are present; for 64-bit also add /lib/64 and
|
||||
/usr/lib/64.
|
||||
4) Merge caller-provided run_paths:
|
||||
- If run_paths is provided, base.insert_default_runpath(rp, run_paths) is
|
||||
used. This replaces any PD_DEFAULT_RUNPATH token in run_paths with the
|
||||
default rp. If the token is absent, the provided run_paths fully override
|
||||
rp. Multiple PD_DEFAULT_RUNPATH tokens raise an error.
|
||||
5) Expand dynamic tokens in rp:
|
||||
- expand_variables() recursively replaces $TOKENS using dyn_tok_conv.
|
||||
- Unknown tokens produce UnsupportedDynamicToken errors (non-fatal) which
|
||||
are returned in the error list.
|
||||
6) For each DT_NEEDED library name d:
|
||||
- For each expanded run path p, form a candidate directory by joining p and
|
||||
d; for kernel64 cases, insert amd64/sparcv9 as appropriate; drop the final
|
||||
filename to retain only directories (run_paths for this dependency).
|
||||
- Create an ElfDependency(action, base_name=basename(d), run_paths=dirs,
|
||||
pkg_vars, proto_dir).
|
||||
|
||||
Semantics of ElfDependency
|
||||
- Inherits PublishingDependency (see below). It resolves against delivered files
|
||||
by joining each run_path with base_name to form candidates.
|
||||
- resolve_internal() is overridden to treat the case where no path resolves but
|
||||
a file with the same base name is delivered by this package as a WARNING
|
||||
instead of an ERROR (assumes external runpath will make it available).
|
||||
That sets pkg.debug.depend.*.severity=warning and marks variants accordingly.
|
||||
|
||||
Python/script analysis (pkg.flavor.script + pkg.flavor.python)
|
||||
References:
|
||||
- src/modules/flavor/script.py
|
||||
- src/modules/flavor/python.py
|
||||
|
||||
Shebang handling (script.py)
|
||||
- For any file with a shebang (#!) and the executable bit set:
|
||||
- Extract interpreter path (first token after #!). If not absolute, record
|
||||
ScriptNonAbsPath error.
|
||||
- Normalize /bin/... to /usr/bin/... and add a ScriptDependency on that
|
||||
interpreter path (base_name = last component; run_paths = directory).
|
||||
- If the shebang line contains the substring "python" (e.g. #!/usr/bin/python3.9),
|
||||
python-specific analysis is triggered by calling
|
||||
python.process_python_dependencies(action, pkg_vars, script_path, run_paths),
|
||||
where script_path is the full shebang line and run_paths is the effective
|
||||
pkg.depend.runpath for the action.
|
||||
|
||||
Python dependency discovery (python.py)
|
||||
- Version inference:
|
||||
- Installed path starting with usr/lib/python<MAJOR>.<MINOR>/ implies a
|
||||
version (dir_major/dir_minor).
|
||||
- Shebang matching ^#!/usr/bin/(<subdir>/)?python<MAJOR>.<MINOR> implies a
|
||||
version (file_major/file_minor).
|
||||
- If the file is executable and both imply versions that disagree, record a
|
||||
PythonMismatchedVersion error and use the directory version for analysis.
|
||||
- Analysis version selection:
|
||||
- If installed path implies version, use that.
|
||||
- Else if shebang implies version, use that.
|
||||
- Else if executable but no specific version (e.g. #!/usr/bin/python),
|
||||
record PythonUnspecifiedVersion and skip analysis.
|
||||
- Else if not executable but installed under usr/lib/pythonX.Y, analyze
|
||||
with that version.
|
||||
- Performing analysis:
|
||||
- If the selected version equals the currently running interpreter
|
||||
(sys.version_info), use in-process analysis:
|
||||
- Construct DepthLimitedModuleFinder with the install directory as the
|
||||
base and pass through run_paths (pkg.depend.runpath). The finder executes
|
||||
the local proto file (action.attrs[PD_LOCAL_PATH]) to discover imports.
|
||||
- For each loaded module, obtain the list of file names (basenames of the
|
||||
modules) and the directories searched (m.dirs). Create
|
||||
PythonDependency(action, base_names=module file names, run_paths=dirs,...).
|
||||
- Any missing imports are reported as PythonModuleMissingPath errors.
|
||||
- Syntax errors are reported as PythonSyntaxError.
|
||||
- If the selected version differs from the running interpreter:
|
||||
- Spawn a subprocess: "python<MAJOR>.<MINOR> depthlimitedmf.py <install_dir>
|
||||
<local_file> [run_paths ...]".
|
||||
- Parse stdout lines:
|
||||
- "DEP <repr((names, dirs))>" -> add PythonDependency for those.
|
||||
- "ERR <module_name>" -> record PythonModuleMissingPath.
|
||||
- Anything else -> PythonSubprocessBadLine.
|
||||
- Nonzero exit -> PythonSubprocessError with return code and stderr.
|
||||
|
||||
About JAR archives
|
||||
- There is no special handling of JAR files in the current implementation.
|
||||
- get_file_type() does not classify JARs and there is no flavor/jar module.
|
||||
- The historical doc/elf-jar-handling.txt mentions the idea of tasting JARs,
|
||||
but this has not been implemented in pkgdepend.
|
||||
- Consequently, pkgdepend does not extract dependencies from .jar manifests or
|
||||
classpaths. Any Java/JAR dependency tracking must be handled out-of-band
|
||||
(e.g., manual packaging dependencies or future tooling).
|
||||
|
||||
PublishingDependency mechanics (flavor/base.py)
|
||||
- A PublishingDependency represents a dependency on one or more files located
|
||||
via a list of run_paths and base_names, or via an explicit full_paths list.
|
||||
- It stores debug attributes under the pkg.debug.depend.* namespace:
|
||||
- .file (base names), .path (run paths) or .fullpath (explicit paths)
|
||||
- .type (elf/python/script/smf/link), .reason, .via-links, .bypassed, etc.
|
||||
- possibly_delivered():
|
||||
- For each candidate path (join of run_path and base_name, or each full_path),
|
||||
calls resolve_links() to account for symlinks and hardlinks and to find
|
||||
real provided paths.
|
||||
- If a path resolves and the resulting path is among delivered files, the
|
||||
dependency is considered satisfied under the relevant variant combination.
|
||||
- resolve_internal():
|
||||
- Checks if another file delivered by the same package satisfies the
|
||||
dependency (via possibly_delivered against the package’s own files/links).
|
||||
- If so, the dependency is pruned. Otherwise, the error is recorded, subject
|
||||
to ELF’s special warning downgrade noted above.
|
||||
|
||||
Resolving dependencies to packages (dependencies.py)
|
||||
- add_fmri_path_mapping(): builds maps from paths to (PFMRI, variant
|
||||
combinations) for both the currently delivered manifests and the installed
|
||||
image (if used).
|
||||
- resolve_links(path, files_dict, links, path_vars, attrs):
|
||||
- Recursively follows link chains to real paths, accumulating variant
|
||||
constraints along the way and generating conditional dependencies when a
|
||||
link from one package points to a file delivered by another.
|
||||
- find_package_using_delivered_files():
|
||||
- For each dependency, computes all candidate paths (make_paths()), resolves
|
||||
them through links (resolve_links), groups results by variant combinations,
|
||||
and then constructs either:
|
||||
- type=require if exactly one provider package resolves the dependency, or
|
||||
- type=require-any if multiple packages could satisfy it.
|
||||
- Debug attributes include:
|
||||
- pkg.debug.depend.file/path/fullpath
|
||||
- pkg.debug.depend.via-links (colon-separated link chain per resolution)
|
||||
- pkg.debug.depend.path-id (a stable id grouping related path attempts)
|
||||
- Link-derived conditional dependencies (type=conditional) are emitted to
|
||||
encode that a dependency is only needed when a particular link provider is
|
||||
present.
|
||||
- find_package(): tries delivered files first; if not fully satisfied and
|
||||
allowed, tries files installed in the current image.
|
||||
- combine(), __collapse_conditionals(), __remove_unneeded_require_and_require_any():
|
||||
- Perform simplification and deduplication of the emitted dependencies and
|
||||
collapse conditional groups where possible.
|
||||
|
||||
Variants and conversion to actions
|
||||
- Each dependency carries variant constraints (VariantCombinations). After
|
||||
generation and internal pruning, convert_to_standard_dep_actions() splits
|
||||
dependencies by unsatisfied variant combinations, producing standard
|
||||
actions.depend.DependencyAction instances ready for output.
|
||||
|
||||
Run path insertion rule (PD_DEFAULT_RUNPATH)
|
||||
- base.insert_default_runpath(default_runpath, run_paths) merges default
|
||||
analyzer-detected search paths with user-provided run_paths:
|
||||
- If run_paths includes the PD_DEFAULT_RUNPATH token, the default_runpath is
|
||||
spliced at that position.
|
||||
- If the token is absent, run_paths replaces the default entirely.
|
||||
- Multiple tokens raise MultipleDefaultRunpaths.
|
||||
|
||||
Notes for Rust implementation
|
||||
- ELF:
|
||||
- Parse DT_NEEDED and DT_RUNPATH. Handle $ORIGIN (directory of installed
|
||||
path) and $PLATFORM expansion. Implement kernel module path rules and
|
||||
64-bit subdir logic. Merge user run paths via PD_DEFAULT_RUNPATH rules.
|
||||
- Build dependencies keyed by base name with a directory search list.
|
||||
- When pruning internal deps, downgrade to warning if base name is delivered
|
||||
by the same package but no path matches.
|
||||
- Python:
|
||||
- Determine Python version from installed path or shebang. Flag mismatches.
|
||||
- Execute import discovery with a depth-limited module finder; if the target
|
||||
version differs, spawn the matching interpreter to run a helper script and
|
||||
parse outputs. Include run_paths in module search.
|
||||
- JAR:
|
||||
- No current implementation. Decide whether to add support or retain current
|
||||
behavior (no automatic JAR dependency extraction).
|
||||
- General:
|
||||
- Implement bypass rules and debug attributes to aid diagnostics.
|
||||
- Implement link resolution and conditional dependency emission.
|
||||
- Respect variant tracking and final conversion to concrete dependency
|
||||
actions.
|
||||
|
||||
Cross-reference
|
||||
- Historical note in doc/elf-jar-handling.txt discusses possible JAR handling,
|
||||
but the current codebase does not implement JAR dependency analysis.
|
||||
|
|
@ -22,6 +22,7 @@ miette = "7.6.0"
|
|||
tracing = "0.1.37"
|
||||
maplit = "1"
|
||||
object = "0.37"
|
||||
goblin = "0.8"
|
||||
sha2 = "0.10"
|
||||
sha3 = "0.10"
|
||||
pest = "2.1.3"
|
||||
|
|
|
|||
619
libips/src/depend/mod.rs
Normal file
619
libips/src/depend/mod.rs
Normal file
|
|
@ -0,0 +1,619 @@
|
|||
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
|
||||
// If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
use crate::actions::{Dependency as ManifestDependency, Manifest};
|
||||
use crate::fmri::Fmri;
|
||||
use crate::repository::ReadableRepository;
|
||||
use miette::Diagnostic;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::error::Error as StdError;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, DependError>;
|
||||
|
||||
#[derive(Error, Debug, Diagnostic)]
|
||||
#[error("Dependency generation error: {message}")]
|
||||
#[diagnostic(code(ips::depend_error), help("Review inputs and file types"))]
|
||||
pub struct DependError {
|
||||
pub message: String,
|
||||
#[source]
|
||||
pub source: Option<Box<dyn StdError + Send + Sync>>, // keep library crate simple
|
||||
}
|
||||
|
||||
impl DependError {
|
||||
fn new(message: impl Into<String>) -> Self {
|
||||
Self { message: message.into(), source: None }
|
||||
}
|
||||
fn with_source(message: impl Into<String>, source: Box<dyn StdError + Send + Sync>) -> Self {
|
||||
Self { message: message.into(), source: Some(source) }
|
||||
}
|
||||
}
|
||||
|
||||
/// Options controlling dependency generation
|
||||
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
|
||||
pub struct GenerateOptions {
|
||||
/// Colon-separated runpath override to be applied to all actions (manifest-level).
|
||||
/// If it contains the PD_DEFAULT_RUNPATH token, default runpaths will be inserted at that position.
|
||||
pub runpath: Option<String>,
|
||||
/// Regex patterns to bypass dependency generation (skip matching actions entirely).
|
||||
pub bypass_patterns: Vec<String>,
|
||||
/// Proto directory base; used to locate local files when only manifest relative paths are known.
|
||||
pub proto_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
/// Token name used to splice in the analyzer default runpaths.
|
||||
pub const PD_DEFAULT_RUNPATH: &str = "PD_DEFAULT_RUNPATH";
|
||||
|
||||
/// Intermediate file-level dependency representation
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FileDepKind {
|
||||
Elf {
|
||||
/// The base filename needed (from DT_NEEDED)
|
||||
base_name: String,
|
||||
/// Directories searched to find the base_name
|
||||
run_paths: Vec<String>,
|
||||
/// Installed path of the object declaring the dependency
|
||||
installed_path: String,
|
||||
},
|
||||
Script {
|
||||
/// The base filename of the interpreter (e.g., python3, sh)
|
||||
base_name: String,
|
||||
/// Directories searched to find the interpreter
|
||||
run_paths: Vec<String>,
|
||||
/// Installed path of the script declaring the dependency
|
||||
installed_path: String,
|
||||
},
|
||||
Python {
|
||||
/// Candidate module file basenames (e.g., foo.py, foo.so)
|
||||
base_names: Vec<String>,
|
||||
/// Directories searched for Python modules for the selected version
|
||||
run_paths: Vec<String>,
|
||||
/// Installed path of the script/module declaring the dependency
|
||||
installed_path: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct FileDep {
|
||||
pub kind: FileDepKind,
|
||||
}
|
||||
|
||||
/// Convert manifest file actions into FileDep entries (ELF only for now).
|
||||
pub fn generate_file_dependencies_from_manifest(manifest: &Manifest, opts: &GenerateOptions) -> Result<Vec<FileDep>> {
|
||||
let mut out = Vec::new();
|
||||
let bypass = compile_bypass(&opts.bypass_patterns)?;
|
||||
|
||||
for f in &manifest.files {
|
||||
// Determine installed path (manifests typically do not start with '/').
|
||||
let installed_path = if f.path.starts_with('/') { f.path.clone() } else { format!("/{}", f.path) };
|
||||
|
||||
if should_bypass(&installed_path, &bypass) {
|
||||
debug!("bypassing dependency generation for {} per patterns", installed_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to find the local file to analyze: prefer explicit original-path property; if it's relative, resolve against proto_dir.
|
||||
let local_path = match f.get_original_path() {
|
||||
Some(op) => {
|
||||
let p = PathBuf::from(&op);
|
||||
if p.is_absolute() {
|
||||
p
|
||||
} else if let Some(base) = &opts.proto_dir {
|
||||
let cand = base.join(op.trim_start_matches('/'));
|
||||
if cand.exists() {
|
||||
cand
|
||||
} else {
|
||||
// Fallback to proto_dir + installed_path
|
||||
base.join(installed_path.trim_start_matches('/'))
|
||||
}
|
||||
} else {
|
||||
// Relative without proto_dir: try as-is (may be relative to CWD)
|
||||
PathBuf::from(op)
|
||||
}
|
||||
}
|
||||
None => match &opts.proto_dir {
|
||||
Some(base) => base.join(installed_path.trim_start_matches('/')),
|
||||
None => continue, // no local file to analyze; skip
|
||||
},
|
||||
};
|
||||
|
||||
// Read local bytes once
|
||||
if let Ok(bytes) = fs::read(&local_path) {
|
||||
// ELF check
|
||||
if bytes.len() >= 4 && &bytes[0..4] == b"\x7FELF" {
|
||||
let mut deps = process_elf(&bytes, &installed_path, opts);
|
||||
out.append(&mut deps);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Script shebang check
|
||||
if let Some(interp) = parse_shebang(&bytes) {
|
||||
// Optional: ensure executable; if mode missing, assume executable
|
||||
let exec_ok = is_executable_mode(&f.mode);
|
||||
if !exec_ok {
|
||||
// Not executable; skip script dependency
|
||||
continue;
|
||||
}
|
||||
// Normalize /bin -> /usr/bin
|
||||
let interp_path = normalize_bin_path(&interp);
|
||||
if !interp_path.starts_with('/') {
|
||||
warn!("Script shebang for {} specifies non-absolute interpreter: {}", installed_path, interp_path);
|
||||
} else {
|
||||
// Derive dir and base name
|
||||
let (dir, base) = split_dir_base(&interp_path);
|
||||
if let Some(dir) = dir {
|
||||
out.push(FileDep { kind: FileDepKind::Script { base_name: base.to_string(), run_paths: vec![dir.to_string()], installed_path: installed_path.clone() } });
|
||||
// If Python interpreter, perform Python analysis
|
||||
if interp_path.contains("python") {
|
||||
if let Some((maj, min)) = infer_python_version_from_paths(&installed_path, Some(&interp_path)) {
|
||||
let mut pydeps = process_python(&bytes, &installed_path, (maj, min), opts);
|
||||
out.append(&mut pydeps);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If no shebang or non-exec, but file is under usr/lib/pythonX.Y/, analyze as module
|
||||
if let Some((maj, min)) = infer_python_version_from_paths(&installed_path, None) {
|
||||
let mut pydeps = process_python(&bytes, &installed_path, (maj, min), opts);
|
||||
out.append(&mut pydeps);
|
||||
}
|
||||
}
|
||||
|
||||
// SMF manifest detection: extract exec paths
|
||||
if looks_like_smf_manifest(&bytes) {
|
||||
for exec_path in extract_smf_execs(&bytes) {
|
||||
if exec_path.starts_with('/') {
|
||||
let (dir, base) = split_dir_base(&exec_path);
|
||||
if let Some(dir) = dir {
|
||||
out.push(FileDep { kind: FileDepKind::Script { base_name: base.to_string(), run_paths: vec![dir.to_string()], installed_path: installed_path.clone() } });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Insert default runpaths into provided runpaths based on PD_DEFAULT_RUNPATH token
|
||||
fn insert_default_runpath(defaults: &[String], provided: &[String]) -> std::result::Result<Vec<String>, DependError> {
|
||||
let mut out = Vec::new();
|
||||
let mut token_count = 0;
|
||||
for p in provided {
|
||||
if p == PD_DEFAULT_RUNPATH {
|
||||
token_count += 1;
|
||||
if token_count > 1 {
|
||||
return Err(DependError::new("Multiple PD_DEFAULT_RUNPATH tokens in runpath override"));
|
||||
}
|
||||
out.extend_from_slice(defaults);
|
||||
} else {
|
||||
out.push(p.clone());
|
||||
}
|
||||
}
|
||||
if token_count == 0 {
|
||||
// Override replaces defaults
|
||||
Ok(provided.to_vec())
|
||||
} else {
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_bypass(patterns: &[String]) -> Result<Vec<Regex>> {
|
||||
let mut out = Vec::new();
|
||||
for p in patterns {
|
||||
out.push(Regex::new(p).map_err(|e| DependError::with_source(format!("invalid bypass pattern: {}", p), Box::new(e)))?);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
fn should_bypass(path: &str, patterns: &[Regex]) -> bool {
|
||||
patterns.iter().any(|re| re.is_match(path))
|
||||
}
|
||||
|
||||
fn process_elf(bytes: &[u8], installed_path: &str, opts: &GenerateOptions) -> Vec<FileDep> {
|
||||
let mut out = Vec::new();
|
||||
match goblin::elf::Elf::parse(bytes) {
|
||||
Ok(elf) => {
|
||||
// DT_NEEDED entries
|
||||
let mut needed: Vec<String> = elf.libraries.iter().map(|s| s.to_string()).collect();
|
||||
if needed.is_empty() {
|
||||
return out;
|
||||
}
|
||||
|
||||
// Default runpaths
|
||||
let mut defaults: Vec<String> = vec!["/lib".into(), "/usr/lib".into()];
|
||||
// crude bitness check: presence of 64-bit elf class
|
||||
if elf.is_64 {
|
||||
defaults.push("/lib/64".into());
|
||||
defaults.push("/usr/lib/64".into());
|
||||
}
|
||||
|
||||
// DT_RUNPATH
|
||||
let mut runpaths: Vec<String> = Vec::new();
|
||||
if !elf.runpaths.is_empty() {
|
||||
for rp in &elf.runpaths {
|
||||
for seg in rp.split(':') {
|
||||
if !seg.is_empty() {
|
||||
runpaths.push(seg.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge with defaults using PD_DEFAULT_RUNPATH semantics if caller provided runpath override
|
||||
let effective = if let Some(ref rp) = opts.runpath {
|
||||
let provided: Vec<String> = rp.split(':').map(|s| s.to_string()).collect();
|
||||
match insert_default_runpath(&defaults, &provided) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
warn!("{}", e.message);
|
||||
provided
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If no override, prefer DT_RUNPATH if present else defaults
|
||||
if runpaths.is_empty() { defaults.clone() } else { runpaths.clone() }
|
||||
};
|
||||
|
||||
// Expand $ORIGIN
|
||||
let origin = Path::new(installed_path).parent().map(|p| p.display().to_string()).unwrap_or_else(|| "/".to_string());
|
||||
let expanded: Vec<String> = effective
|
||||
.into_iter()
|
||||
.map(|p| p.replace("$ORIGIN", &origin))
|
||||
.collect();
|
||||
|
||||
// Emit FileDep for each DT_NEEDED base name
|
||||
for bn in needed.drain(..) {
|
||||
out.push(FileDep { kind: FileDepKind::Elf { base_name: bn, run_paths: expanded.clone(), installed_path: installed_path.to_string() } });
|
||||
}
|
||||
}
|
||||
Err(err) => warn!("ELF parse error for {}: {}", installed_path, err),
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Resolve file-level dependencies into manifest Dependency actions by consulting a repository.
|
||||
pub fn resolve_dependencies<R: ReadableRepository>(
|
||||
repo: &R,
|
||||
publisher: Option<&str>,
|
||||
file_deps: &[FileDep],
|
||||
) -> Result<Vec<ManifestDependency>> {
|
||||
// Build a mapping from path -> providers (FMRIs)
|
||||
let path_map = build_path_provider_map(repo, publisher)?;
|
||||
|
||||
let mut deps: Vec<ManifestDependency> = Vec::new();
|
||||
|
||||
for fd in file_deps {
|
||||
match &fd.kind {
|
||||
FileDepKind::Elf { base_name, run_paths, .. } => {
|
||||
let mut providers: Vec<Fmri> = Vec::new();
|
||||
for dir in run_paths {
|
||||
let full = normalize_join(dir, base_name);
|
||||
if let Some(list) = path_map.get(&full) {
|
||||
for f in list {
|
||||
if !providers.contains(f) {
|
||||
providers.push(f.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if providers.len() == 1 {
|
||||
let fmri = providers.remove(0);
|
||||
deps.push(ManifestDependency {
|
||||
fmri: Some(fmri),
|
||||
dependency_type: "require".to_string(),
|
||||
predicate: None,
|
||||
root_image: String::new(),
|
||||
optional: Vec::new(),
|
||||
facets: HashMap::new(),
|
||||
});
|
||||
} else if providers.len() > 1 {
|
||||
// Our model lacks a group for require-any; emit one per FMRI
|
||||
for fmri in providers.into_iter() {
|
||||
deps.push(ManifestDependency {
|
||||
fmri: Some(fmri),
|
||||
dependency_type: "require-any".to_string(),
|
||||
predicate: None,
|
||||
root_image: String::new(),
|
||||
optional: Vec::new(),
|
||||
facets: HashMap::new(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// unresolved -> skip for now; future: emit analysis warnings
|
||||
}
|
||||
}
|
||||
FileDepKind::Script { base_name, run_paths, .. } => {
|
||||
let mut providers: Vec<Fmri> = Vec::new();
|
||||
for dir in run_paths {
|
||||
let full = normalize_join(dir, base_name);
|
||||
if let Some(list) = path_map.get(&full) {
|
||||
for f in list {
|
||||
if !providers.contains(f) {
|
||||
providers.push(f.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if providers.len() == 1 {
|
||||
let fmri = providers.remove(0);
|
||||
deps.push(ManifestDependency {
|
||||
fmri: Some(fmri),
|
||||
dependency_type: "require".to_string(),
|
||||
predicate: None,
|
||||
root_image: String::new(),
|
||||
optional: Vec::new(),
|
||||
facets: HashMap::new(),
|
||||
});
|
||||
} else if providers.len() > 1 {
|
||||
for fmri in providers.into_iter() {
|
||||
deps.push(ManifestDependency {
|
||||
fmri: Some(fmri),
|
||||
dependency_type: "require-any".to_string(),
|
||||
predicate: None,
|
||||
root_image: String::new(),
|
||||
optional: Vec::new(),
|
||||
facets: HashMap::new(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
}
|
||||
}
|
||||
FileDepKind::Python { base_names, run_paths, .. } => {
|
||||
let mut providers: Vec<Fmri> = Vec::new();
|
||||
for dir in run_paths {
|
||||
for base in base_names {
|
||||
let full = normalize_join(dir, base);
|
||||
if let Some(list) = path_map.get(&full) {
|
||||
for f in list {
|
||||
if !providers.contains(f) {
|
||||
providers.push(f.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if providers.len() == 1 {
|
||||
let fmri = providers.remove(0);
|
||||
deps.push(ManifestDependency {
|
||||
fmri: Some(fmri),
|
||||
dependency_type: "require".to_string(),
|
||||
predicate: None,
|
||||
root_image: String::new(),
|
||||
optional: Vec::new(),
|
||||
facets: HashMap::new(),
|
||||
});
|
||||
} else if providers.len() > 1 {
|
||||
for fmri in providers.into_iter() {
|
||||
deps.push(ManifestDependency {
|
||||
fmri: Some(fmri),
|
||||
dependency_type: "require-any".to_string(),
|
||||
predicate: None,
|
||||
root_image: String::new(),
|
||||
optional: Vec::new(),
|
||||
facets: HashMap::new(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(deps)
|
||||
}
|
||||
|
||||
fn normalize_join(dir: &str, base: &str) -> String {
|
||||
if dir.ends_with('/') {
|
||||
format!("{}{}", dir.trim_end_matches('/'), format!("/{}", base))
|
||||
} else {
|
||||
format!("{}/{}", dir, base)
|
||||
}
|
||||
}
|
||||
|
||||
fn build_path_provider_map<R: ReadableRepository>(repo: &R, publisher: Option<&str>) -> Result<HashMap<String, Vec<Fmri>>> {
|
||||
// Ask repo to show contents for all packages (files only)
|
||||
let contents = repo
|
||||
.show_contents(publisher, None, Some(&["file".to_string()]))
|
||||
.map_err(|e| DependError::with_source("Repository show_contents failed", Box::new(e)))?;
|
||||
|
||||
let mut map: HashMap<String, Vec<Fmri>> = HashMap::new();
|
||||
for pc in contents {
|
||||
let fmri = match pc.package_id.parse::<Fmri>() {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
warn!("Skipping package with invalid FMRI {}: {}", pc.package_id, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Some(files) = pc.files {
|
||||
for p in files {
|
||||
// Ensure leading slash
|
||||
let key = if p.starts_with('/') { p } else { format!("/{}", p) };
|
||||
map.entry(key).or_default().push(fmri.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
|
||||
// --- Helpers for script processing ---
|
||||
fn parse_shebang(bytes: &[u8]) -> Option<String> {
|
||||
if bytes.len() < 2 || bytes[0] != b'#' || bytes[1] != b'!' {
|
||||
return None;
|
||||
}
|
||||
// Extract first line after #!
|
||||
let mut end = bytes.len();
|
||||
for (i, b) in bytes.iter().enumerate().skip(2) {
|
||||
if *b == b'\n' || *b == b'\r' {
|
||||
end = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
let line = &bytes[2..end];
|
||||
let text = String::from_utf8_lossy(line);
|
||||
let s = text.trim();
|
||||
if s.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// First token is interpreter path
|
||||
let mut parts = s.split_whitespace();
|
||||
parts.next().map(|p| p.to_string())
|
||||
}
|
||||
|
||||
fn is_executable_mode(mode_str: &str) -> bool {
|
||||
// If mode is empty or unparsable, assume executable to avoid missing deps
|
||||
let ms = mode_str.trim();
|
||||
if ms.is_empty() {
|
||||
return true;
|
||||
}
|
||||
// Accept strings like "0755" or "755"
|
||||
match u32::from_str_radix(ms.trim_start_matches('0'), 8) {
|
||||
Ok(bits) => bits & 0o111 != 0,
|
||||
Err(_) => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_bin_path(path: &str) -> String {
|
||||
if path.starts_with("/bin/") {
|
||||
path.replacen("/bin/", "/usr/bin/", 1)
|
||||
} else {
|
||||
path.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn split_dir_base<'a>(path: &'a str) -> (Option<&'a str>, &'a str) {
|
||||
if let Some(idx) = path.rfind('/') {
|
||||
if idx == 0 {
|
||||
return (Some("/"), &path[1..]);
|
||||
}
|
||||
(Some(&path[..idx]), &path[idx + 1..])
|
||||
} else {
|
||||
(None, path)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn looks_like_smf_manifest(bytes: &[u8]) -> bool {
|
||||
// Very lightweight detection: SMF manifests are XML files with a <service_bundle ...> root
|
||||
// We do a lossy UTF-8 conversion and look for the tag to avoid a full XML parser.
|
||||
let text = String::from_utf8_lossy(bytes);
|
||||
text.contains("<service_bundle")
|
||||
}
|
||||
|
||||
// --- Python helpers ---
|
||||
fn infer_python_version_from_paths(installed_path: &str, shebang_path: Option<&str>) -> Option<(u8, u8)> {
|
||||
// Prefer version implied by installed path under /usr/lib/pythonX.Y
|
||||
if let Ok(re) = Regex::new(r"^/usr/lib/python(\d+)\.(\d+)(/|$)") {
|
||||
if let Some(c) = re.captures(installed_path) {
|
||||
if let (Some(ma), Some(mi)) = (c.get(1), c.get(2)) {
|
||||
if let (Ok(maj), Ok(min)) = (ma.as_str().parse::<u8>(), mi.as_str().parse::<u8>()) {
|
||||
return Some((maj, min));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Else, try to infer from shebang interpreter path (e.g., /usr/bin/python3.11)
|
||||
if let Some(sb) = shebang_path {
|
||||
if let Ok(re) = Regex::new(r"python(\d+)\.(\d+)") {
|
||||
if let Some(c) = re.captures(sb) {
|
||||
if let (Some(ma), Some(mi)) = (c.get(1), c.get(2)) {
|
||||
if let (Ok(maj), Ok(min)) = (ma.as_str().parse::<u8>(), mi.as_str().parse::<u8>()) {
|
||||
return Some((maj, min));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn compute_python_runpaths(version: (u8, u8), opts: &GenerateOptions) -> Vec<String> {
|
||||
let (maj, min) = version;
|
||||
let base = format!("/usr/lib/python{}.{}", maj, min);
|
||||
let mut defaults = vec![
|
||||
base.clone(),
|
||||
format!("{}/vendor-packages", base),
|
||||
format!("{}/site-packages", base),
|
||||
format!("{}/lib-dynload", base),
|
||||
];
|
||||
if let Some(ref rp) = opts.runpath {
|
||||
let provided: Vec<String> = rp.split(':').map(|s| s.to_string()).collect();
|
||||
match insert_default_runpath(&defaults, &provided) {
|
||||
Ok(v) => v,
|
||||
Err(_) => provided,
|
||||
}
|
||||
} else {
|
||||
defaults
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_python_imports(src: &str) -> Vec<String> {
|
||||
let mut mods = Vec::new();
|
||||
// Regex for 'import x[.y][, z]' - handle only first module per line for simplicity
|
||||
if let Ok(re_imp) = Regex::new(r"(?m)^\s*import\s+([A-Za-z_][A-Za-z0-9_\.]*)") {
|
||||
for cap in re_imp.captures_iter(src) {
|
||||
if let Some(m) = cap.get(1) {
|
||||
let name = m.as_str().split('.').next().unwrap_or("").to_string();
|
||||
if !name.is_empty() && !mods.contains(&name) {
|
||||
mods.push(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Regex for 'from x.y import ...'
|
||||
if let Ok(re_from) = Regex::new(r"(?m)^\s*from\s+([A-Za-z_][A-Za-z0-9_\.]*)\s+import\s+") {
|
||||
for cap in re_from.captures_iter(src) {
|
||||
if let Some(m) = cap.get(1) {
|
||||
let name = m.as_str().split('.').next().unwrap_or("").to_string();
|
||||
if !name.is_empty() && !mods.contains(&name) {
|
||||
mods.push(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mods
|
||||
}
|
||||
|
||||
fn process_python(bytes: &[u8], installed_path: &str, version: (u8, u8), opts: &GenerateOptions) -> Vec<FileDep> {
|
||||
let text = String::from_utf8_lossy(bytes);
|
||||
let imports = collect_python_imports(&text);
|
||||
if imports.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
// Base names to search: module.py and module.so
|
||||
let mut base_names: Vec<String> = Vec::new();
|
||||
for m in imports {
|
||||
let py = format!("{}.py", m);
|
||||
let so = format!("{}.so", m);
|
||||
if !base_names.contains(&py) { base_names.push(py); }
|
||||
if !base_names.contains(&so) { base_names.push(so); }
|
||||
}
|
||||
let run_paths = compute_python_runpaths(version, opts);
|
||||
vec![FileDep { kind: FileDepKind::Python { base_names, run_paths, installed_path: installed_path.to_string() } }]
|
||||
}
|
||||
|
||||
// --- SMF helpers ---
|
||||
fn extract_smf_execs(bytes: &[u8]) -> Vec<String> {
|
||||
let text = String::from_utf8_lossy(bytes);
|
||||
let mut out = Vec::new();
|
||||
// Match exec="..." or exec='...'
|
||||
if let Ok(re) = Regex::new(r#"exec\s*=\s*\"([^\"]+)\"|exec\s*=\s*'([^']+)'"#) {
|
||||
for cap in re.captures_iter(&text) {
|
||||
let m = cap.get(1).or_else(|| cap.get(2));
|
||||
if let Some(v) = m {
|
||||
let val = v.as_str().to_string();
|
||||
if !out.contains(&val) { out.push(val); }
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
|
@ -13,6 +13,7 @@ pub mod repository;
|
|||
pub mod publisher;
|
||||
pub mod transformer;
|
||||
pub mod solver;
|
||||
pub mod depend;
|
||||
mod test_json_manifest;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue