From 0cf93b09edfa4a0afa5124553d6b2d933a792a7a Mon Sep 17 00:00:00 2001 From: John Sirois Date: Wed, 9 Nov 2022 12:39:17 -0800 Subject: [PATCH] Implement boot bindings support. Now commands can depend on `{boot.bindings.}` placeholders which are satisfied by boot bindings commands that are run exactly once. This is used in the Python example to produce a scie-pants Pants binary that prepares a fully pre-compiled Pants venv that runs at the full speed of a native Pants venv. The boot binding command that creates the venv also uses a private PEX_ROOT and cleans this up in addition to the Pants PEX used to build the venv. The result is an ~/.nce with just the CPython interpreter and a venv that symlinks to it. There is no wasted speed or space as compared to a traditional Pants Python venv (save for the size of the scie binary itself which may be addressed by #9 or #19). Closes #7 Closes #20 --- .github/workflows/ci.yml | 8 +- Cargo.lock | 17 ++ examples/python/lift.linux-x86_64.json | 37 +++- examples/python/lift.macos-x86_64.json | 30 ++- jump/Cargo.toml | 1 + jump/src/atomic.rs | 15 +- jump/src/config.rs | 4 +- jump/src/context.rs | 267 ++++++++++++++++++------- jump/src/installer.rs | 235 ++++++++++------------ jump/src/lib.rs | 21 +- jump/src/lift.rs | 22 +- jump/src/placeholders.rs | 43 ++-- jump/src/process.rs | 10 +- src/boot.rs | 5 +- 14 files changed, 455 insertions(+), 260 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3210d3..b9550e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,11 +60,13 @@ jobs: set -x examples/prepare.sh python dist/scie-jump* examples/python/lift.${{ matrix.arch }}.json - time RUST_LOG=trace ./pants --no-pantsd -V - time ./pants --no-pantsd -V + + touch BUILDROOT + time RUST_LOG=trace ./scie-pants --no-pantsd -V + time RUST_LOG=debug ./scie-pants --no-pantsd -V # Use the built-in BusyBox functionality via env var. - SCIE_BOOT=inspect ./pants interpreter --verbose --indent 2 + SCIE_BOOT=repl ./scie-pants -c 'from pants.util import strutil; print(strutil.__file__)' - name: Java Example (Coursier) if: contains(matrix.examples, 'java') run: | diff --git a/Cargo.lock b/Cargo.lock index 162782c..d373542 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -249,6 +249,22 @@ dependencies = [ "wasi", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "indexmap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "instant" version = "0.1.12" @@ -298,6 +314,7 @@ dependencies = [ "dirs", "fd-lock", "flate2", + "indexmap", "itertools", "log", "logging_timer", diff --git a/examples/python/lift.linux-x86_64.json b/examples/python/lift.linux-x86_64.json index deb435c..661b59a 100644 --- a/examples/python/lift.linux-x86_64.json +++ b/examples/python/lift.linux-x86_64.json @@ -1,7 +1,7 @@ { "scie": { "lift": { - "name": "pants", + "name": "scie-pants", "description": "The ergonomic build system.", "boot": { "commands": { @@ -10,20 +10,38 @@ "env": { "=PATH": "{cpython}/python/bin:{scie.env.PATH}" }, - "exe": "{cpython}/python/bin/python3.9", + "exe": "{scie.bindings.venv}/venv/bin/python3.9", "args": [ - "{pants.2.14.0.pex}" + "{scie.bindings.venv}/venv/pex" ] }, - "inspect": { - "description": "PEX tools for Pants.", + "repl": { + "description": "A Python repl with Pants (minus plugins) available for inspection.", "env": { "=PATH": "{cpython}/python/bin:{scie.env.PATH}", "PEX_TOOLS": "1" }, + "exe": "{scie.bindings.venv}/venv/bin/python3.9" + } + }, + "bindings": { + "venv": { + "description": "Installs Pants in a venv and pre-compiles .pyc.", + "env": { + "=PATH": "{cpython}/python/bin:{scie.env.PATH}", + "PEX_TOOLS": "1", + "PEX_ROOT": "{scie.bindings}/pex_root" + }, "exe": "{cpython}/python/bin/python3.9", "args": [ - "{pants.2.14.0.pex}" + "{pants.2.14.0.pex}", + "venv", + "--bin-path", + "prepend", + "--compile", + "--rm", + "all", + "{scie.bindings}/venv" ] } } @@ -31,13 +49,10 @@ "files": [ { "name": "cpython-3.9.14+20221002-x86_64-unknown-linux-gnu-install_only.tar.gz", - "key": "cpython", - "hash": "e63d0c00a499e0202ba7a0f53ce69fca6d30237af39af9bc3c76bce6c7bf14d7", - "size": 26761725 + "key": "cpython" }, { - "name": "pants.2.14.0.pex", - "type": "zip" + "name": "pants.2.14.0.pex" } ] } diff --git a/examples/python/lift.macos-x86_64.json b/examples/python/lift.macos-x86_64.json index 859cc9c..615f043 100644 --- a/examples/python/lift.macos-x86_64.json +++ b/examples/python/lift.macos-x86_64.json @@ -1,7 +1,7 @@ { "scie": { "lift": { - "name": "pants", + "name": "scie-pants", "description": "The ergonomic build system.", "boot": { "commands": { @@ -10,20 +10,38 @@ "env": { "=PATH": "{cpython}/python/bin:{scie.env.PATH}" }, - "exe": "{cpython}/python/bin/python3.9", + "exe": "{scie.bindings.venv}/venv/bin/python3.9", "args": [ - "{pants.2.14.0.pex}" + "{scie.bindings.venv}/venv/pex" ] }, - "inspect": { - "description": "PEX tools for Pants.", + "repl": { + "description": "A Python repl with Pants (minus plugins) available for inspection.", "env": { "=PATH": "{cpython}/python/bin:{scie.env.PATH}", "PEX_TOOLS": "1" }, + "exe": "{scie.bindings.venv}/venv/bin/python3.9" + } + }, + "bindings": { + "venv": { + "description": "Installs Pants in a venv and pre-compiles .pyc.", + "env": { + "=PATH": "{cpython}/python/bin:{scie.env.PATH}", + "PEX_TOOLS": "1", + "PEX_ROOT": "{scie.bindings}/pex_root" + }, "exe": "{cpython}/python/bin/python3.9", "args": [ - "{pants.2.14.0.pex}" + "{pants.2.14.0.pex}", + "venv", + "--bin-path", + "prepend", + "--compile", + "--rm", + "all", + "{scie.bindings}/venv" ] } } diff --git a/jump/Cargo.toml b/jump/Cargo.toml index 23e1111..ca13625 100644 --- a/jump/Cargo.toml +++ b/jump/Cargo.toml @@ -15,6 +15,7 @@ bzip2 = "0.4" dirs = "4.0" fd-lock = "3.0" flate2 = "1.0" # For gz support. +indexmap = "1.9" itertools = "0.10" log = { workspace = true } logging_timer = { workspace = true } diff --git a/jump/src/atomic.rs b/jump/src/atomic.rs index a0aeeab..8d759d7 100644 --- a/jump/src/atomic.rs +++ b/jump/src/atomic.rs @@ -10,14 +10,14 @@ use serde::Serializer; #[derive(Copy, Clone, Eq, PartialEq)] pub(crate) enum Target { Directory, - _File, // TODO(John Sirois): Use for run-once boot bindings. + File, // TODO(John Sirois): Use for run-once boot bindings. } impl Display for Target { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Target::Directory => f.serialize_str("directory"), - Target::_File => f.serialize_str("file"), + Target::File => f.serialize_str("file"), } } } @@ -32,7 +32,7 @@ impl Target { return Ok(false); } } - Target::_File => { + Target::File => { if target.is_file() { return Ok(true); } else if !target.exists() { @@ -67,6 +67,10 @@ where // First check. if target_type.check_exists(target)? { + debug!( + "The atomic {target_type} at {path} has already been established.", + path = target.display() + ); return Ok(()); } @@ -102,6 +106,11 @@ where // Second check. if target_type.check_exists(target)? { + debug!( + "The atomic {target_type} at {path} has already been established \ + (lost double-check race).", + path = target.display() + ); return Ok(()); } diff --git a/jump/src/config.rs b/jump/src/config.rs index 92c89cf..d3feb25 100644 --- a/jump/src/config.rs +++ b/jump/src/config.rs @@ -216,7 +216,7 @@ pub struct Jump { pub version: String, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct Boot { pub commands: BTreeMap, @@ -287,7 +287,7 @@ impl Default for Fmt { } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct Other { #[serde(flatten)] other: BTreeMap, diff --git a/jump/src/context.rs b/jump/src/context.rs index 5301fcb..ba4c5e4 100644 --- a/jump/src/context.rs +++ b/jump/src/context.rs @@ -3,18 +3,22 @@ use std::collections::{BTreeMap, HashSet}; use std::env; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::path::{Component, Path, PathBuf}; use bstr::ByteSlice; +use indexmap::IndexMap; use logging_timer::time; -use crate::config::Cmd; +use crate::atomic::{atomic_path, Target}; +use crate::config::{Cmd, Fmt}; use crate::lift::{File, Lift}; use crate::placeholders::{self, Item, Placeholder}; +use crate::process::{EnvVar, Process}; +use crate::{config, EnvVars, Jump}; -fn expanduser(path: PathBuf) -> Result { - if !<[u8]>::from_path(&path) +fn expanduser(path: &Path) -> Result { + if !<[u8]>::from_path(path) .ok_or_else(|| { format!( "Failed to decode the path {} as utf-8 bytes", @@ -23,7 +27,7 @@ fn expanduser(path: PathBuf) -> Result { })? .contains(&b'~') { - return Ok(path); + return Ok(path.to_path_buf()); } let home_dir = dirs::home_dir() @@ -49,111 +53,195 @@ fn path_to_str(path: &Path) -> Result<&str, String> { .map_err(|e| format!("{e}")) } -pub struct Boot { - pub name: String, - pub description: Option, +pub(crate) enum FileEntry { + Skip(usize), + Install((File, PathBuf)), +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct Binding { + target: PathBuf, + process: Process, +} + +impl Binding { + pub(crate) fn execute(self) -> Result<(), String> { + atomic_path(&self.target.clone(), Target::File, |lock| { + trace!("Installing boot binding {binding:#?}", binding = &self); + match self.process.execute() { + Err(err) => return Err(format!("Failed to launch boot binding: {err}")), + Ok(exit_status) if !exit_status.success() => { + return Err(format!("Boot binding command failed: {exit_status}")); + } + _ => std::fs::write(lock, b"").map_err(|e| { + format!( + "Failed to touch lock file {path}: {e}", + path = lock.display() + ) + }), + } + }) + } } pub(crate) struct SelectedCmd { - pub(crate) scie: PathBuf, - pub(crate) cmd: Cmd, + pub(crate) process: Process, + pub(crate) bindings: Vec, + pub(crate) files: Vec, pub(crate) argv1_consumed: bool, } -pub(crate) struct Context { - scie: PathBuf, - commands: BTreeMap, - _bindings: BTreeMap, +pub(crate) struct Context<'a> { + scie: &'a Path, + jump: &'a Jump, + lift: &'a Lift, base: PathBuf, - files_by_name: BTreeMap, - pub(crate) files: Vec, - pub(crate) replacements: HashSet, - pub(crate) description: Option, + files_by_name: BTreeMap<&'a str, &'a File>, + replacements: HashSet<&'a File>, + bindings: IndexMap<&'a str, Binding>, } fn try_as_str(os_str: &OsStr) -> Option<&str> { <[u8]>::from_os_str(os_str).and_then(|bytes| std::str::from_utf8(bytes).ok()) } -impl Context { +impl<'a> Context<'a> { #[time("debug")] - pub(crate) fn new(scie_path: PathBuf, lift: Lift) -> Result { + fn new(scie: &'a Path, jump: &'a Jump, lift: &'a Lift) -> Result { let mut files_by_name = BTreeMap::new(); for file in &lift.files { - files_by_name.insert(file.name.clone(), file.clone()); + files_by_name.insert(file.name.as_str(), file); if let Some(key) = file.key.as_ref() { - files_by_name.insert(key.clone(), file.clone()); + files_by_name.insert(key.as_str(), file); } } Ok(Context { - scie: scie_path, - description: lift.description, - commands: lift.boot.commands, - _bindings: lift.boot.bindings, - base: expanduser(lift.base)?, + scie, + jump, + lift, + base: expanduser(&lift.base)?, files_by_name, - files: lift.files, replacements: HashSet::new(), + bindings: IndexMap::new(), }) } - #[cfg(target_family = "windows")] - fn scie_basename(&self) -> Option<&str> { - self.scie.file_stem().and_then(try_as_str) + fn prepare_process(&mut self, cmd: &'a Cmd) -> Result { + let vars = cmd + .env + .iter() + .map(|(key, value)| { + self.reify_string(value) + .map(|v| (EnvVar::from(key), OsString::from(v))) + }) + .collect::, _>>()?; + let exe = self.reify_string(&cmd.exe)?.into(); + let args = cmd + .args + .iter() + .map(|string| self.reify_string(string).map(OsString::from)) + .collect::, _>>()?; + + Ok(Process { + env: EnvVars { vars }, + exe, + args, + }) } - #[cfg(not(target_family = "windows"))] - fn scie_basename(&self) -> Option<&str> { - self.scie.file_name().and_then(try_as_str) + fn prepare(&mut self, cmd: &'a Cmd) -> Result<(Process, Vec, Vec), String> { + let process = self.prepare_process(cmd)?; + + let mut file_entries = vec![]; + for file in &self.lift.files { + if self.replacements.contains(&file) { + let path = self.get_path(file); + file_entries.push(FileEntry::Install((file.clone(), path))) + } else { + file_entries.push(FileEntry::Skip(file.size)) + } + } + + Ok(( + process, + self.bindings + .values() + .map(Binding::clone) + .collect::>(), + file_entries, + )) } - fn select_cmd(&self, name: &str, argv1_consumed: bool) -> Option { - self.commands.get(name).map(|cmd| SelectedCmd { - scie: self.scie.clone(), - cmd: cmd.clone(), - argv1_consumed, - }) + fn select_cmd( + &mut self, + name: &str, + argv1_consumed: bool, + ) -> Result, String> { + if let Some(cmd) = self.lift.boot.commands.get(name) { + let (process, bindings, files) = self.prepare(cmd)?; + return Ok(Some(SelectedCmd { + process, + bindings, + files, + argv1_consumed, + })); + } + Ok(None) } - pub(crate) fn select_command(&self) -> Result, String> { + fn select_command(&mut self) -> Result, String> { if let Some(cmd) = env::var_os("SCIE_BOOT") { let name = cmd.into_string().map_err(|value| { format!("Failed to decode environment variable SCIE_BOOT: {value:?}") })?; - return Ok(self.select_cmd(&name, false)); + return self.select_cmd(&name, false); + } + if let Some(selected_cmd) = self.select_cmd("", false)? { + return Ok(Some(selected_cmd)); } - Ok(self - .select_cmd("", false) - .or_else(|| { - self.scie_basename() - .and_then(|basename| self.select_cmd(basename, false)) - }) - .or_else(|| { - env::args() - .nth(1) - .and_then(|argv1| self.select_cmd(&argv1, true)) - })) - } - pub(crate) fn boots(&self) -> Vec { - self.commands - .iter() - .map(|(name, cmd)| Boot { - name: name.to_string(), - description: cmd.description.clone(), - }) - .collect::>() - } + #[cfg(target_family = "windows")] + let basename = self.scie.file_stem().and_then(try_as_str); - pub(crate) fn get_file(&self, name: &str) -> Option<&File> { - self.files_by_name.get(name) + #[cfg(not(target_family = "windows"))] + let basename = self.scie.file_name().and_then(try_as_str); + + if let Some(basename) = basename { + if let Some(selected_command) = self.select_cmd(basename, false)? { + return Ok(Some(selected_command)); + } + } + if let Some(argv1) = env::args().nth(1) { + return self.select_cmd(&argv1, true); + } + Ok(None) } - pub(crate) fn get_path(&self, file: &File) -> PathBuf { + fn get_path(&self, file: &File) -> PathBuf { self.base.join(&file.hash).join(&file.name) } - pub(crate) fn reify_string(&mut self, value: &str) -> Result { + fn record_lift_manifest(&self) -> Result { + let manifest = self.base.join(&self.lift.hash).join("lift.json"); + atomic_path(&manifest, Target::File, |path| { + config(self.jump.clone(), self.lift.clone()).serialize( + std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(path) + .map_err(|e| { + format!( + "Failed top open lift manifest at {path} for writing: {e}", + path = manifest.display() + ) + })?, + Fmt::new().trailing_newline(true).pretty(true), + ) + })?; + Ok(manifest) + } + + fn reify_string(&mut self, value: &'a str) -> Result { let mut reified = String::with_capacity(value.len()); let parsed = placeholders::parse(value)?; @@ -163,11 +251,12 @@ impl Context { Item::Text(text) => reified.push_str(text), Item::Placeholder(Placeholder::FileName(name)) => { let file = self - .get_file(name) + .files_by_name + .get(name) .ok_or_else(|| format!("No file named {name} is stored in this scie."))?; let path = self.get_path(file); reified.push_str(path_to_str(&path)?); - self.replacements.insert(file.clone()); + self.replacements.insert(file); } Item::Placeholder(Placeholder::Env(name)) => { let env_var = env::var_os(name).unwrap_or_else(|| "".into()); @@ -176,16 +265,44 @@ impl Context { })?; reified.push_str(&value) } - Item::Placeholder(Placeholder::Scie) => reified.push_str(path_to_str(&self.scie)?), - // TODO(John Sirois): Handle these as part of tackling #7 - Item::Placeholder(Placeholder::ScieBoot) => { - return Err("The {scie.boot} placeholder is not supported yet".to_string()) + Item::Placeholder(Placeholder::Scie) => reified.push_str(path_to_str(self.scie)?), + Item::Placeholder(Placeholder::ScieBindings) => { + reified.push_str(path_to_str( + &self.base.join(&self.lift.hash).join("bindings"), + )?); + } + Item::Placeholder(Placeholder::ScieBindingCmd(name)) => { + let boot_binding = Binding { + target: self.base.join(&self.lift.hash).join("locks").join(name), + process: self.prepare_process( + self.lift + .boot + .bindings + .get(*name) + .ok_or_else(|| format!("No boot binding named {name}."))?, + )?, + }; + self.bindings.insert(name, boot_binding); + reified.push_str(path_to_str( + &self.base.join(&self.lift.hash).join("bindings"), + )?); } - Item::Placeholder(Placeholder::ScieBootCmd(_name)) => { - return Err("The {scie.boot.} placeholder is not supported yet".to_string()) + Item::Placeholder(Placeholder::ScieLift) => { + let manifest = self.record_lift_manifest()?; + reified.push_str(path_to_str(&manifest)?); } } } Ok(reified) } } + +pub(crate) fn select_command( + scie: &Path, + jump: &Jump, + lift: &Lift, +) -> Result, String> { + let mut context = Context::new(scie, jump, lift)?; + context.record_lift_manifest()?; + context.select_command() +} diff --git a/jump/src/installer.rs b/jump/src/installer.rs index 7d169bf..cbb732d 100644 --- a/jump/src/installer.rs +++ b/jump/src/installer.rs @@ -1,19 +1,41 @@ // Copyright 2022 Science project contributors. // Licensed under the Apache License, Version 2.0 (see LICENSE). -use std::collections::HashSet; -use std::ffi::OsString; use std::fs::OpenOptions; -use std::io::{Cursor, Read, Seek}; +use std::io::{Cursor, Read, Seek, SeekFrom}; use std::path::Path; use logging_timer::time; use crate::atomic::{atomic_path, Target}; -use crate::config::{ArchiveType, Cmd, Compression, FileType}; -use crate::context::Context; -use crate::process::{EnvVar, EnvVars, Process}; -use crate::{fingerprint, File}; +use crate::config::{ArchiveType, Compression, FileType}; +use crate::context::FileEntry; +use crate::fingerprint; + +fn check_hash( + file_type: &str, + mut bytes: R, + expected_hash: &str, + dst: &Path, +) -> Result { + let (size, actual_hash) = fingerprint::digest_reader(&mut bytes)?; + if expected_hash != actual_hash.as_str() { + return Err(format!( + "The {file_type} destination {dst} of size {size} had unexpected hash: {actual_hash}", + dst = dst.display(), + )); + } else { + // TODO(John Sirois): Hash in-line with extraction. + bytes + .seek(SeekFrom::Start(0)) + .map_err(|e| format!("Failed to re-wind {file_type} after hashing: {e}"))?; + debug!( + "The {file_type} destination {dst} of size {size} had expected hash", + dst = dst.display() + ); + Ok(bytes) + } +} #[time("debug")] fn unpack_tar(archive_type: ArchiveType, tar_stream: R, dst: &Path) -> Result<(), String> { @@ -26,48 +48,53 @@ fn unpack_tar(archive_type: ArchiveType, tar_stream: R, dst: &Path) -> fn unpack_archive( archive: ArchiveType, bytes: R, + expected_hash: &str, dst: &Path, ) -> Result<(), String> { - atomic_path(dst, Target::Directory, |work_dir| match archive { - ArchiveType::Zip => { - let mut zip = zip::ZipArchive::new(bytes) - .map_err(|e| format!("Failed to open {archive:?}: {e}"))?; - zip.extract(work_dir) - .map_err(|e| format!("Failed to extract {archive:?}: {e}")) - } - ArchiveType::Tar => unpack_tar(archive, bytes, work_dir), - ArchiveType::CompressedTar(Compression::Bzip2) => { - let bzip2_decoder = bzip2::read::BzDecoder::new(bytes); - unpack_tar(archive, bzip2_decoder, work_dir) - } - ArchiveType::CompressedTar(Compression::Gzip) => { - let gz_decoder = flate2::read::GzDecoder::new(bytes); - unpack_tar(archive, gz_decoder, work_dir) - } - ArchiveType::CompressedTar(Compression::Xz) => { - let xz_decoder = xz2::read::XzDecoder::new(bytes); - unpack_tar(archive, xz_decoder, work_dir) - } - ArchiveType::CompressedTar(Compression::Zlib) => { - let zlib_decoder = flate2::read::ZlibDecoder::new(bytes); - unpack_tar(archive, zlib_decoder, work_dir) - } - ArchiveType::CompressedTar(Compression::Zstd) => { - let zstd_decoder = zstd::stream::Decoder::new(bytes).map_err(|e| { - format!( - "Failed to create a zstd decoder for unpacking to {dst}: {e}", - dst = dst.display() - ) - })?; - unpack_tar(archive, zstd_decoder, work_dir) + atomic_path(dst, Target::Directory, |work_dir| { + let hashed_bytes = check_hash(archive.as_ext(), bytes, expected_hash, dst)?; + match archive { + ArchiveType::Zip => { + let mut zip = zip::ZipArchive::new(hashed_bytes) + .map_err(|e| format!("Failed to open {archive:?}: {e}"))?; + zip.extract(work_dir) + .map_err(|e| format!("Failed to extract {archive:?}: {e}")) + } + ArchiveType::Tar => unpack_tar(archive, hashed_bytes, work_dir), + ArchiveType::CompressedTar(Compression::Bzip2) => { + let bzip2_decoder = bzip2::read::BzDecoder::new(hashed_bytes); + unpack_tar(archive, bzip2_decoder, work_dir) + } + ArchiveType::CompressedTar(Compression::Gzip) => { + let gz_decoder = flate2::read::GzDecoder::new(hashed_bytes); + unpack_tar(archive, gz_decoder, work_dir) + } + ArchiveType::CompressedTar(Compression::Xz) => { + let xz_decoder = xz2::read::XzDecoder::new(hashed_bytes); + unpack_tar(archive, xz_decoder, work_dir) + } + ArchiveType::CompressedTar(Compression::Zlib) => { + let zlib_decoder = flate2::read::ZlibDecoder::new(hashed_bytes); + unpack_tar(archive, zlib_decoder, work_dir) + } + ArchiveType::CompressedTar(Compression::Zstd) => { + let zstd_decoder = zstd::stream::Decoder::new(hashed_bytes).map_err(|e| { + format!( + "Failed to create a zstd decoder for unpacking to {dst}: {e}", + dst = dst.display() + ) + })?; + unpack_tar(archive, zstd_decoder, work_dir) + } } }) } #[time("debug")] -fn unpack_blob(mut bytes: R, dst: &Path) -> Result<(), String> { +fn unpack_blob(bytes: R, expected_hash: &str, dst: &Path) -> Result<(), String> { let parent_dir = dst.parent().ok_or_else(|| "".to_owned())?; atomic_path(parent_dir, Target::Directory, |work_dir| { + let mut hashed_bytes = check_hash("blob", bytes, expected_hash, dst)?; let blob_dst = work_dir.join(dst.file_name().ok_or_else(|| { format!( "Blob destination {dst} has no file name.", @@ -84,116 +111,74 @@ fn unpack_blob(mut bytes: R, dst: &Path) -> Result<(), String> { blob_dst = blob_dst.display() ) })?; - std::io::copy(&mut bytes, &mut blob_out) + std::io::copy(&mut hashed_bytes, &mut blob_out) .map(|_| ()) .map_err(|e| format!("{e}")) }) } -fn unpack(file_type: FileType, bytes: R, dst: &Path) -> Result<(), String> { +fn unpack( + file_type: FileType, + bytes: R, + expected_hash: &str, + dst: &Path, +) -> Result<(), String> { match file_type { - FileType::Archive(archive_type) => unpack_archive(archive_type, bytes, dst), - FileType::Blob => unpack_blob(bytes, dst), - FileType::Directory => unpack_archive(ArchiveType::Zip, bytes, dst), - } -} - -fn file_type_to_unpack(file: &File, dst: &Path) -> Option { - match file.file_type { - archive_type @ FileType::Archive(_) if !dst.is_dir() => Some(archive_type), - FileType::Blob if !dst.is_file() => Some(FileType::Blob), - FileType::Directory if !dst.is_dir() => Some(FileType::Directory), - _ => None, + FileType::Archive(archive_type) => unpack_archive(archive_type, bytes, expected_hash, dst), + FileType::Blob => unpack_blob(bytes, expected_hash, dst), + FileType::Directory => unpack_archive(ArchiveType::Zip, bytes, expected_hash, dst), } } #[time("debug")] -pub(crate) fn prepare( - mut context: Context, - command: Cmd, - payload: &[u8], -) -> Result { - let mut to_extract = HashSet::new(); - let exe = context.reify_string(&command.exe)?.into(); - let args = command - .args - .iter() - .map(|string| context.reify_string(string).map(OsString::from)) - .collect::, _>>()?; - let env = command - .env - .iter() - .map(|(key, value)| { - context - .reify_string(value) - .map(|v| (EnvVar::from(key), OsString::from(v))) - }) - .collect::, _>>()?; - - for file in &context.replacements { - to_extract.insert(file.clone()); - } - +pub(crate) fn install(files: &[FileEntry], payload: &[u8]) -> Result<(), String> { let mut scie_tote = vec![]; let mut location = 0; - for file in &context.files { - if file.eager_extract || to_extract.contains(file) { - let dst = context.get_path(file); - if let Some(file_type) = file_type_to_unpack(file, &dst) { + for file_entry in files { + let advance = match file_entry { + FileEntry::Skip(size) => *size, + FileEntry::Install((file, dst)) => { if file.size == 0 { - scie_tote.push((file, file_type, dst.clone())); + scie_tote.push((file, file.file_type, dst.clone())); } else { let bytes = &payload[location..(location + file.size)]; - let actual_hash = fingerprint::digest(bytes); - if file.hash != actual_hash { - return Err(format!( - "Destination {dst} of size {size} had unexpected hash: {actual_hash}", - size = file.size, - dst = dst.display(), - )); - } else { - debug!( - "Destination {dst} of size {size} had expected hash", - size = file.size, - dst = dst.display() - ); - } - unpack(file_type, Cursor::new(bytes), &dst)?; + unpack(file.file_type, Cursor::new(bytes), file.hash.as_str(), dst)?; } - } else { - debug!("Cache hit {dst} for {file:?}", dst = dst.display()) + file.size } - } - location += file.size; + }; + location += advance; } if !scie_tote.is_empty() { - let tote_file = context.files.last().ok_or_else(|| { + if let FileEntry::Install((tote_file, _)) = files.last().ok_or_else(|| { format!( "Expected the last file to be the scie-tote holding these files: {scie_tote:#?}" ) - })?; - let scie_tote_tmpdir = tempfile::TempDir::new().map_err(|e| { - format!("Failed to create a temporary directory to extract the scie-tote to: {e}") - })?; - let scie_tote_path = scie_tote_tmpdir.path().join(&tote_file.name); - let bytes = &payload[(location - tote_file.size)..location]; - unpack(tote_file.file_type, Cursor::new(bytes), &scie_tote_path)?; - for (file, file_type, dst) in scie_tote { - let src_path = scie_tote_path.join(&file.name); - let src = std::fs::File::open(&src_path).map_err(|e| { - format!( - "Failed to open {file:?} at {src} from the unpacked scie-tote: {e}", - src = src_path.display() - ) + })? { + let scie_tote_tmpdir = tempfile::TempDir::new().map_err(|e| { + format!("Failed to create a temporary directory to extract the scie-tote to: {e}") })?; - unpack(file_type, &src, &dst)?; + let scie_tote_path = scie_tote_tmpdir.path().join(&tote_file.name); + let bytes = &payload[(location - tote_file.size)..location]; + unpack( + tote_file.file_type, + Cursor::new(bytes), + tote_file.hash.as_str(), + &scie_tote_path, + )?; + for (file, file_type, dst) in scie_tote { + let src_path = scie_tote_path.join(&file.name); + let src = std::fs::File::open(&src_path).map_err(|e| { + format!( + "Failed to open {file:?} at {src} from the unpacked scie-tote: {e}", + src = src_path.display() + ) + })?; + unpack(file_type, &src, file.hash.as_str(), &dst)?; + } } } - Ok(Process { - exe, - args, - env: EnvVars { vars: env }, - }) + Ok(()) } diff --git a/jump/src/lib.rs b/jump/src/lib.rs index d147be4..5e0db32 100644 --- a/jump/src/lib.rs +++ b/jump/src/lib.rs @@ -27,16 +27,14 @@ use logging_timer::time; pub use crate::archive::create_options; use crate::config::Config; pub use crate::config::Jump; -pub use crate::context::Boot; -use crate::context::Context; // Exposed for the package crate post-processing of the scie-jump binary. pub use crate::jump::EOF_MAGIC; -pub use crate::lift::{load_lift, File, Lift}; +pub use crate::lift::{load_lift, File, Lift, ScieBoot}; pub use crate::process::{execute, EnvVar, EnvVars, Process}; pub use crate::zip::check_is_zip; pub struct SelectBoot { - pub boots: Vec, + pub boots: Vec, pub description: Option, pub error_message: Option, } @@ -118,21 +116,24 @@ pub fn prepare_boot(current_exe: PathBuf) -> Result { } let manifest_size = lift.size; - let context = Context::new(current_exe, lift)?; - let result = context.select_command(); + let result = context::select_command(¤t_exe, &jump, &lift); if let Ok(Some(selected_command)) = result { let payload = &data[jump.size..data.len() - manifest_size]; - let process = installer::prepare(context, selected_command.cmd, payload)?; + installer::install(&selected_command.files, payload)?; + let process = selected_command.process; trace!("Prepared {process:#?}"); - env::set_var("SCIE", selected_command.scie.as_os_str()); + for binding in selected_command.bindings { + binding.execute()?; + } + env::set_var("SCIE", current_exe.as_os_str()); Ok(BootAction::Execute(( process, selected_command.argv1_consumed, ))) } else { Ok(BootAction::Select(SelectBoot { - boots: context.boots(), - description: context.description, + boots: lift.boots(), + description: lift.description, error_message: result.err(), })) } diff --git a/jump/src/lift.rs b/jump/src/lift.rs index 8ce91cf..89c2e96 100644 --- a/jump/src/lift.rs +++ b/jump/src/lift.rs @@ -9,7 +9,7 @@ use logging_timer::time; use crate::config::{ArchiveType, Boot, Config, FileType, Jump, Other}; use crate::{archive, fingerprint}; -#[derive(Hash, Eq, PartialEq, Clone, Debug)] +#[derive(Clone, Debug, Hash, Eq, PartialEq)] pub struct File { pub name: String, pub key: Option, @@ -35,7 +35,7 @@ impl From for crate::config::File { } } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Lift { pub name: String, pub description: Option, @@ -47,6 +47,24 @@ pub struct Lift { pub(crate) other: Option, } +pub struct ScieBoot { + pub name: String, + pub description: Option, +} + +impl Lift { + pub(crate) fn boots(&self) -> Vec { + self.boot + .commands + .iter() + .map(|(name, cmd)| ScieBoot { + name: name.to_string(), + description: cmd.description.clone(), + }) + .collect::>() + } +} + impl From for crate::config::Lift { fn from(value: Lift) -> Self { crate::config::Lift { diff --git a/jump/src/placeholders.rs b/jump/src/placeholders.rs index 74026c0..de89904 100644 --- a/jump/src/placeholders.rs +++ b/jump/src/placeholders.rs @@ -6,8 +6,9 @@ pub(crate) enum Placeholder<'a> { FileName(&'a str), Scie, - ScieBoot, - ScieBootCmd(&'a str), + ScieBindings, + ScieBindingCmd(&'a str), + ScieLift, Env(&'a str), } @@ -74,11 +75,14 @@ pub(crate) fn parse(text: &str) -> Result { } match symbol.splitn(3, '.').collect::>()[..] { ["scie"] => items.push(Item::Placeholder(Placeholder::Scie)), - ["scie", "boot"] => items.push(Item::Placeholder(Placeholder::ScieBoot)), - ["scie", "boot", cmd] => { - items.push(Item::Placeholder(Placeholder::ScieBootCmd(cmd))) + ["scie", "bindings"] => { + items.push(Item::Placeholder(Placeholder::ScieBindings)) + } + ["scie", "bindings", cmd] => { + items.push(Item::Placeholder(Placeholder::ScieBindingCmd(cmd))) } ["scie", "env", name] => items.push(Item::Placeholder(Placeholder::Env(name))), + ["scie", "lift"] => items.push(Item::Placeholder(Placeholder::ScieLift)), _ => items.push(Item::Placeholder(Placeholder::FileName(symbol))), } previous_char = Some('}'); @@ -135,37 +139,42 @@ mod tests { } #[test] - fn scie_boot() { + fn scie_bindings() { assert_eq!( - vec![Item::Placeholder(Placeholder::ScieBoot)], - parse("{scie.boot}").unwrap().items + vec![Item::Placeholder(Placeholder::ScieBindings)], + parse("{scie.bindings}").unwrap().items ); assert_eq!( - vec![Item::Text("A "), Item::Placeholder(Placeholder::ScieBoot)], - parse("A {scie.boot}").unwrap().items + vec![ + Item::Text("A "), + Item::Placeholder(Placeholder::ScieBindings) + ], + parse("A {scie.bindings}").unwrap().items ); assert_eq!( vec![ Item::Text("A "), - Item::Placeholder(Placeholder::ScieBoot), + Item::Placeholder(Placeholder::ScieBindings), Item::Text(" warmer") ], - parse("A {scie.boot} warmer").unwrap().items + parse("A {scie.bindings} warmer").unwrap().items ); } #[test] - fn scie_boot_cmd() { + fn scie_bindings_cmd() { assert_eq!( - vec![Item::Placeholder(Placeholder::ScieBootCmd("do"))], - parse("{scie.boot.do}").unwrap().items + vec![Item::Placeholder(Placeholder::ScieBindingCmd("do"))], + parse("{scie.bindings.do}").unwrap().items ); assert_eq!( vec![ - Item::Placeholder(Placeholder::ScieBootCmd("dotted.cmd.name")), + Item::Placeholder(Placeholder::ScieBindingCmd("dotted.cmd.name")), Item::Text("/venv/pex"), ], - parse("{scie.boot.dotted.cmd.name}/venv/pex").unwrap().items + parse("{scie.bindings.dotted.cmd.name}/venv/pex") + .unwrap() + .items ); } diff --git a/jump/src/process.rs b/jump/src/process.rs index 3f63a56..68f1d61 100644 --- a/jump/src/process.rs +++ b/jump/src/process.rs @@ -6,7 +6,7 @@ use std::process::{Command, ExitStatus}; use crate::config::EnvVar as ConfigEnvVar; -#[derive(Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum EnvVar { Default(OsString), Replace(OsString), @@ -21,7 +21,7 @@ impl From<&ConfigEnvVar> for EnvVar { } } -#[derive(Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct EnvVars { pub vars: Vec<(EnvVar, OsString)>, } @@ -67,7 +67,7 @@ where .map_err(|e| format!("Spawned {exe:?} {args:?} but failed to gather its exit status: {e}")) } -#[derive(Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Process { pub env: EnvVars, pub exe: OsString, @@ -75,7 +75,7 @@ pub struct Process { } impl Process { - pub fn _execute(self, process: Process) -> Result { - execute_with_env(process.exe, process.args, 1, process.env.into_env_vars()) + pub fn execute(self) -> Result { + execute_with_env(self.exe, self.args, usize::MAX, self.env.into_env_vars()) } } diff --git a/src/boot.rs b/src/boot.rs index f09603a..74b1887 100644 --- a/src/boot.rs +++ b/src/boot.rs @@ -53,6 +53,9 @@ pub(crate) fn select(select_boot: SelectBoot) -> ExitResult { }) .collect::>() .join("\n"), - error_message = select_boot.error_message.unwrap_or_default() + error_message = select_boot + .error_message + .map(|err| format!("\nERROR: {err}")) + .unwrap_or_default() ))) }