Skip to content

Commit

Permalink
fix(package): check dirtiness of symlinks source files (#14981)
Browse files Browse the repository at this point in the history
### What does this PR try to resolve?

This adds a special case for checking source files are symlinks
and have been modified when under a VCS control

This is required because those paths may link to a file outside the
current package root, but still under the git workdir, affecting the
final packaged `.crate` file.

### How should we test and review this PR?

Pretty similar to #14966, as a part of #14967.

This may have potential performance issue. If a package contains
thousands of symlinks, Cargo will fire `git status` for each of them.
Not sure if we want to do anything proactively now.

The introduction of the `PathEntry` struct gives us more room for
storing file metadata to satisfiy use cases in the future. For
instances,

* Knowing a source file is a symlink and resolving it when packaging on
Windows
  * #5664
  * #14965
* Knowing more about a file's metadata (e.g. permission bits from Git)
  * #4413
  * #8006
* Provide richer information for `cargo package --list`, for example
JSON output mode
  * #11666
  * #13331
  * #13953
  • Loading branch information
epage authored Dec 31, 2024
2 parents d73d2ca + 24dd205 commit d85d761
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 32 deletions.
6 changes: 3 additions & 3 deletions crates/cargo-util/src/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -710,9 +710,9 @@ pub fn set_file_time_no_err<P: AsRef<Path>>(path: P, time: FileTime) {
/// This canonicalizes both paths before stripping. This is useful if the
/// paths are obtained in different ways, and one or the other may or may not
/// have been normalized in some way.
pub fn strip_prefix_canonical<P: AsRef<Path>>(
path: P,
base: P,
pub fn strip_prefix_canonical(
path: impl AsRef<Path>,
base: impl AsRef<Path>,
) -> Result<PathBuf, std::path::StripPrefixError> {
// Not all filesystems support canonicalize. Just ignore if it doesn't work.
let safe_canonicalize = |path: &Path| match path.canonicalize() {
Expand Down
62 changes: 35 additions & 27 deletions src/cargo/ops/cargo_package/vcs.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Helpers to gather the VCS information for `cargo package`.
use std::path::Path;
use std::collections::HashSet;
use std::path::PathBuf;

use anyhow::Context as _;
Expand Down Expand Up @@ -186,7 +186,7 @@ fn git(
.iter()
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
.map(|p| p.as_ref())
.chain(dirty_metadata_paths(pkg, repo)?.iter())
.chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
.map(|path| {
pathdiff::diff_paths(path, cwd)
.as_ref()
Expand Down Expand Up @@ -219,39 +219,47 @@ fn git(
}
}

/// Checks whether files at paths specified in `package.readme` and
/// `package.license-file` have been modified.
/// Checks whether "included" source files outside package root have been modified.
///
/// This currently looks at
///
/// * `package.readme` and `package.license-file` pointing to paths outside package root
/// * symlinks targets reside outside package root
///
/// This is required because those paths may link to a file outside the
/// current package root, but still under the git workdir, affecting the
/// final packaged `.crate` file.
fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<Vec<PathBuf>> {
let mut dirty_files = Vec::new();
fn dirty_files_outside_pkg_root(
pkg: &Package,
repo: &git2::Repository,
src_files: &[PathEntry],
) -> CargoResult<HashSet<PathBuf>> {
let pkg_root = pkg.root();
let workdir = repo.workdir().unwrap();
let root = pkg.root();

let meta = pkg.manifest().metadata();
for path in [&meta.license_file, &meta.readme] {
let Some(path) = path.as_deref().map(Path::new) else {
continue;
};
let abs_path = paths::normalize_path(&root.join(path));
if paths::strip_prefix_canonical(abs_path.as_path(), root).is_ok() {
// Inside package root. Don't bother checking git status.
continue;
}
if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), workdir) {
// Outside package root but under git workdir,
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
dirty_files.push(if abs_path.is_symlink() {
// For symlinks, shows paths to symlink sources
workdir.join(rel_path)
} else {
abs_path
});
}
let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
.into_iter()
.filter_map(|p| p.as_deref())
.map(|path| paths::normalize_path(&pkg_root.join(path)))
.collect();

let mut dirty_symlinks = HashSet::new();
for rel_path in src_files
.iter()
.filter(|p| p.is_symlink_or_under_symlink())
.map(|p| p.as_ref())
.chain(metadata_paths.iter())
// If inside package root. Don't bother checking git status.
.filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
// Handle files outside package root but under git workdir,
.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
{
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
dirty_symlinks.insert(workdir.join(rel_path));
}
}
Ok(dirty_files)
Ok(dirty_symlinks)
}

/// Helper to collect dirty statuses for a single repo.
Expand Down
40 changes: 40 additions & 0 deletions src/cargo/sources/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ impl From<gix::dir::entry::Kind> for FileType {
pub struct PathEntry {
path: PathBuf,
ty: FileType,
/// Whether this path was visited when traversing a symlink directory.
under_symlink_dir: bool,
}

impl PathEntry {
Expand All @@ -469,10 +471,21 @@ impl PathEntry {

/// Similar to [`std::path::Path::is_symlink`]
/// but doesn't follow the symbolic link nor make any system call
///
/// If the path is not a symlink but under a symlink parent directory,
/// this will return false.
/// See [`PathEntry::is_symlink_or_under_symlink`] for an alternative.
pub fn is_symlink(&self) -> bool {
matches!(self.ty, FileType::Symlink)
}

/// Whether a path is a symlink or a path under a symlink directory.
///
/// Use [`PathEntry::is_symlink`] to get the exact file type of the path only.
pub fn is_symlink_or_under_symlink(&self) -> bool {
self.is_symlink() || self.under_symlink_dir
}

/// Whether this path might be a plain text symlink.
///
/// Git may check out symlinks as plain text files that contain the link texts,
Expand Down Expand Up @@ -826,6 +839,9 @@ fn list_files_gix(
files.push(PathEntry {
path: file_path,
ty,
// Git index doesn't include files from symlink diretory,
// symlink dirs are handled in `list_files_walk`.
under_symlink_dir: false,
});
}
}
Expand All @@ -847,6 +863,10 @@ fn list_files_walk(
) -> CargoResult<()> {
let walkdir = WalkDir::new(path)
.follow_links(true)
// While this is the default, set it explicitly.
// We need walkdir to visit the directory tree in depth-first order,
// so we can ensure a path visited later be under a certain directory.
.contents_first(false)
.into_iter()
.filter_entry(|entry| {
let path = entry.path();
Expand Down Expand Up @@ -876,10 +896,27 @@ fn list_files_walk(

true
});

let mut current_symlink_dir = None;
for entry in walkdir {
match entry {
Ok(entry) => {
let file_type = entry.file_type();

match current_symlink_dir.as_ref() {
Some(dir) if entry.path().starts_with(dir) => {
// Still walk under the same parent symlink dir, so keep it
}
Some(_) | None => {
// Not under any parent symlink dir, update the current one.
current_symlink_dir = if file_type.is_dir() && entry.path_is_symlink() {
Some(entry.path().to_path_buf())
} else {
None
};
}
}

if file_type.is_file() || file_type.is_symlink() {
// We follow_links(true) here so check if entry was created from a symlink
let ty = if entry.path_is_symlink() {
Expand All @@ -890,6 +927,8 @@ fn list_files_walk(
ret.push(PathEntry {
path: entry.into_path(),
ty,
// This rely on contents_first(false), which walks in depth-first order
under_symlink_dir: current_symlink_dir.is_some(),
});
}
}
Expand All @@ -907,6 +946,7 @@ fn list_files_walk(
Some(path) => ret.push(PathEntry {
path: path.to_path_buf(),
ty: FileType::Other,
under_symlink_dir: false,
}),
None => return Err(err.into()),
},
Expand Down
11 changes: 9 additions & 2 deletions tests/testsuite/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1339,10 +1339,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
license-file = "../LICENSE"
"#,
)
.file("original-dir/file", "before")
.symlink("lib.rs", "isengard/src/lib.rs")
.symlink("README.md", "isengard/README.md")
.file(&main_outside_pkg_root, "fn main() {}")
.symlink(&main_outside_pkg_root, "isengard/src/main.rs")
.symlink_dir("original-dir", "isengard/symlink-dir")
});
git::commit(&repo);

Expand All @@ -1352,6 +1354,7 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
// * Changes in files outside package root that source files symlink to
p.change_file("README.md", "after");
p.change_file("lib.rs", "pub fn after() {}");
p.change_file("original-dir/file", "after");
// * Changes in files outside pkg root that `license-file`/`readme` point to
p.change_file("LICENSE", "after");
// * When workspace inheritance is involved and changed
Expand All @@ -1375,10 +1378,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
p.cargo("package --workspace --no-verify")
.with_status(101)
.with_stderr_data(str![[r#"
[ERROR] 2 files in the working directory contain changes that were not yet committed into git:
[ERROR] 4 files in the working directory contain changes that were not yet committed into git:
LICENSE
README.md
lib.rs
original-dir/file
to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag
Expand All @@ -1388,7 +1393,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
p.cargo("package --workspace --no-verify --allow-dirty")
.with_stderr_data(str![[r#"
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
[PACKAGED] 8 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
[PACKAGED] 9 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
"#]])
.run();
Expand All @@ -1411,13 +1416,15 @@ edition = "2021"
"Cargo.toml.orig",
"src/lib.rs",
"src/main.rs",
"symlink-dir/file",
"Cargo.lock",
"LICENSE",
"README.md",
],
[
("src/lib.rs", str!["pub fn after() {}"]),
("src/main.rs", str![r#"fn main() { eprintln!("after"); }"#]),
("symlink-dir/file", str!["after"]),
("README.md", str!["after"]),
("LICENSE", str!["after"]),
("Cargo.toml", cargo_toml),
Expand Down

0 comments on commit d85d761

Please sign in to comment.