diff --git a/crates/cargo-util/src/paths.rs b/crates/cargo-util/src/paths.rs index 5d7e3c5a681..cc73fbad892 100644 --- a/crates/cargo-util/src/paths.rs +++ b/crates/cargo-util/src/paths.rs @@ -710,9 +710,9 @@ pub fn set_file_time_no_err>(path: P, time: FileTime) { /// This canonicalizes both paths before stripping. This is useful if the /// paths are obtained in different ways, and one or the other may or may not /// have been normalized in some way. -pub fn strip_prefix_canonical>( - path: P, - base: P, +pub fn strip_prefix_canonical( + path: impl AsRef, + base: impl AsRef, ) -> Result { // Not all filesystems support canonicalize. Just ignore if it doesn't work. let safe_canonicalize = |path: &Path| match path.canonicalize() { diff --git a/src/cargo/ops/cargo_package/vcs.rs b/src/cargo/ops/cargo_package/vcs.rs index 3c447f32b72..9f2f57284c4 100644 --- a/src/cargo/ops/cargo_package/vcs.rs +++ b/src/cargo/ops/cargo_package/vcs.rs @@ -1,6 +1,6 @@ //! Helpers to gather the VCS information for `cargo package`. -use std::path::Path; +use std::collections::HashSet; use std::path::PathBuf; use anyhow::Context as _; @@ -186,7 +186,7 @@ fn git( .iter() .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path))) .map(|p| p.as_ref()) - .chain(dirty_metadata_paths(pkg, repo)?.iter()) + .chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter()) .map(|path| { pathdiff::diff_paths(path, cwd) .as_ref() @@ -219,39 +219,47 @@ fn git( } } -/// Checks whether files at paths specified in `package.readme` and -/// `package.license-file` have been modified. +/// Checks whether "included" source files outside package root have been modified. +/// +/// This currently looks at +/// +/// * `package.readme` and `package.license-file` pointing to paths outside package root +/// * symlinks targets reside outside package root /// /// This is required because those paths may link to a file outside the /// current package root, but still under the git workdir, affecting the /// final packaged `.crate` file. -fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult> { - let mut dirty_files = Vec::new(); +fn dirty_files_outside_pkg_root( + pkg: &Package, + repo: &git2::Repository, + src_files: &[PathEntry], +) -> CargoResult> { + let pkg_root = pkg.root(); let workdir = repo.workdir().unwrap(); - let root = pkg.root(); + let meta = pkg.manifest().metadata(); - for path in [&meta.license_file, &meta.readme] { - let Some(path) = path.as_deref().map(Path::new) else { - continue; - }; - let abs_path = paths::normalize_path(&root.join(path)); - if paths::strip_prefix_canonical(abs_path.as_path(), root).is_ok() { - // Inside package root. Don't bother checking git status. - continue; - } - if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), workdir) { - // Outside package root but under git workdir, - if repo.status_file(&rel_path)? != git2::Status::CURRENT { - dirty_files.push(if abs_path.is_symlink() { - // For symlinks, shows paths to symlink sources - workdir.join(rel_path) - } else { - abs_path - }); - } + let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme] + .into_iter() + .filter_map(|p| p.as_deref()) + .map(|path| paths::normalize_path(&pkg_root.join(path))) + .collect(); + + let mut dirty_symlinks = HashSet::new(); + for rel_path in src_files + .iter() + .filter(|p| p.is_symlink_or_under_symlink()) + .map(|p| p.as_ref()) + .chain(metadata_paths.iter()) + // If inside package root. Don't bother checking git status. + .filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err()) + // Handle files outside package root but under git workdir, + .filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok()) + { + if repo.status_file(&rel_path)? != git2::Status::CURRENT { + dirty_symlinks.insert(workdir.join(rel_path)); } } - Ok(dirty_files) + Ok(dirty_symlinks) } /// Helper to collect dirty statuses for a single repo. diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 8bafcf18099..c1e81528fbd 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -448,6 +448,8 @@ impl From for FileType { pub struct PathEntry { path: PathBuf, ty: FileType, + /// Whether this path was visited when traversing a symlink directory. + under_symlink_dir: bool, } impl PathEntry { @@ -469,10 +471,21 @@ impl PathEntry { /// Similar to [`std::path::Path::is_symlink`] /// but doesn't follow the symbolic link nor make any system call + /// + /// If the path is not a symlink but under a symlink parent directory, + /// this will return false. + /// See [`PathEntry::is_symlink_or_under_symlink`] for an alternative. pub fn is_symlink(&self) -> bool { matches!(self.ty, FileType::Symlink) } + /// Whether a path is a symlink or a path under a symlink directory. + /// + /// Use [`PathEntry::is_symlink`] to get the exact file type of the path only. + pub fn is_symlink_or_under_symlink(&self) -> bool { + self.is_symlink() || self.under_symlink_dir + } + /// Whether this path might be a plain text symlink. /// /// Git may check out symlinks as plain text files that contain the link texts, @@ -826,6 +839,9 @@ fn list_files_gix( files.push(PathEntry { path: file_path, ty, + // Git index doesn't include files from symlink diretory, + // symlink dirs are handled in `list_files_walk`. + under_symlink_dir: false, }); } } @@ -847,6 +863,10 @@ fn list_files_walk( ) -> CargoResult<()> { let walkdir = WalkDir::new(path) .follow_links(true) + // While this is the default, set it explicitly. + // We need walkdir to visit the directory tree in depth-first order, + // so we can ensure a path visited later be under a certain directory. + .contents_first(false) .into_iter() .filter_entry(|entry| { let path = entry.path(); @@ -876,10 +896,27 @@ fn list_files_walk( true }); + + let mut current_symlink_dir = None; for entry in walkdir { match entry { Ok(entry) => { let file_type = entry.file_type(); + + match current_symlink_dir.as_ref() { + Some(dir) if entry.path().starts_with(dir) => { + // Still walk under the same parent symlink dir, so keep it + } + Some(_) | None => { + // Not under any parent symlink dir, update the current one. + current_symlink_dir = if file_type.is_dir() && entry.path_is_symlink() { + Some(entry.path().to_path_buf()) + } else { + None + }; + } + } + if file_type.is_file() || file_type.is_symlink() { // We follow_links(true) here so check if entry was created from a symlink let ty = if entry.path_is_symlink() { @@ -890,6 +927,8 @@ fn list_files_walk( ret.push(PathEntry { path: entry.into_path(), ty, + // This rely on contents_first(false), which walks in depth-first order + under_symlink_dir: current_symlink_dir.is_some(), }); } } @@ -907,6 +946,7 @@ fn list_files_walk( Some(path) => ret.push(PathEntry { path: path.to_path_buf(), ty: FileType::Other, + under_symlink_dir: false, }), None => return Err(err.into()), }, diff --git a/tests/testsuite/package.rs b/tests/testsuite/package.rs index 9c6d0493f83..ae1c5d71024 100644 --- a/tests/testsuite/package.rs +++ b/tests/testsuite/package.rs @@ -1339,10 +1339,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() { license-file = "../LICENSE" "#, ) + .file("original-dir/file", "before") .symlink("lib.rs", "isengard/src/lib.rs") .symlink("README.md", "isengard/README.md") .file(&main_outside_pkg_root, "fn main() {}") .symlink(&main_outside_pkg_root, "isengard/src/main.rs") + .symlink_dir("original-dir", "isengard/symlink-dir") }); git::commit(&repo); @@ -1352,6 +1354,7 @@ fn dirty_file_outside_pkg_root_considered_dirty() { // * Changes in files outside package root that source files symlink to p.change_file("README.md", "after"); p.change_file("lib.rs", "pub fn after() {}"); + p.change_file("original-dir/file", "after"); // * Changes in files outside pkg root that `license-file`/`readme` point to p.change_file("LICENSE", "after"); // * When workspace inheritance is involved and changed @@ -1375,10 +1378,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() { p.cargo("package --workspace --no-verify") .with_status(101) .with_stderr_data(str![[r#" -[ERROR] 2 files in the working directory contain changes that were not yet committed into git: +[ERROR] 4 files in the working directory contain changes that were not yet committed into git: LICENSE README.md +lib.rs +original-dir/file to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag @@ -1388,7 +1393,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d p.cargo("package --workspace --no-verify --allow-dirty") .with_stderr_data(str![[r#" [PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard) -[PACKAGED] 8 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) +[PACKAGED] 9 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) "#]]) .run(); @@ -1411,6 +1416,7 @@ edition = "2021" "Cargo.toml.orig", "src/lib.rs", "src/main.rs", + "symlink-dir/file", "Cargo.lock", "LICENSE", "README.md", @@ -1418,6 +1424,7 @@ edition = "2021" [ ("src/lib.rs", str!["pub fn after() {}"]), ("src/main.rs", str![r#"fn main() { eprintln!("after"); }"#]), + ("symlink-dir/file", str!["after"]), ("README.md", str!["after"]), ("LICENSE", str!["after"]), ("Cargo.toml", cargo_toml),