diff --git a/src/cargo/ops/cargo_package/mod.rs b/src/cargo/ops/cargo_package/mod.rs index 07ba919c98a..6c920eec4e3 100644 --- a/src/cargo/ops/cargo_package/mod.rs +++ b/src/cargo/ops/cargo_package/mod.rs @@ -16,6 +16,7 @@ use crate::core::Workspace; use crate::core::{Package, PackageId, PackageSet, Resolve, SourceId}; use crate::ops::lockfile::LOCKFILE_NAME; use crate::ops::registry::{infer_registry, RegistryOrIndex}; +use crate::sources::path::PathEntry; use crate::sources::registry::index::{IndexPackage, RegistryDependency}; use crate::sources::{PathSource, CRATES_IO_REGISTRY}; use crate::util::cache_lock::CacheLockMode; @@ -396,7 +397,7 @@ fn prepare_archive( fn build_ar_list( ws: &Workspace<'_>, pkg: &Package, - src_files: Vec, + src_files: Vec, vcs_info: Option, ) -> CargoResult> { let mut result = HashMap::new(); @@ -420,7 +421,7 @@ fn build_ar_list( .push(ArchiveFile { rel_path: rel_path.to_owned(), rel_str: rel_str.to_owned(), - contents: FileContents::OnDisk(src_file.clone()), + contents: FileContents::OnDisk(src_file.to_path_buf()), }); } } diff --git a/src/cargo/ops/cargo_package/vcs.rs b/src/cargo/ops/cargo_package/vcs.rs index 44adfd85a1a..3c447f32b72 100644 --- a/src/cargo/ops/cargo_package/vcs.rs +++ b/src/cargo/ops/cargo_package/vcs.rs @@ -9,6 +9,7 @@ use serde::Serialize; use tracing::debug; use crate::core::Package; +use crate::sources::PathEntry; use crate::CargoResult; use crate::GlobalContext; @@ -41,7 +42,7 @@ pub struct GitVcsInfo { #[tracing::instrument(skip_all)] pub fn check_repo_state( p: &Package, - src_files: &[PathBuf], + src_files: &[PathEntry], gctx: &GlobalContext, opts: &PackageOpts<'_>, ) -> CargoResult> { @@ -91,6 +92,8 @@ pub fn check_repo_state( return Ok(None); } + warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?; + debug!( "found (git) Cargo.toml at `{}` in workdir `{}`", path.display(), @@ -110,11 +113,57 @@ pub fn check_repo_state( return Ok(Some(VcsInfo { git, path_in_vcs })); } +/// Warns if any symlinks were checked out as plain text files. +/// +/// Git config [`core.symlinks`] defaults to true when unset. +/// In git-for-windows (and git as well), +/// the config should be set to false explicitly when the repo was created, +/// if symlink support wasn't detected [^1]. +/// +/// We assume the config was always set at creation time and never changed. +/// So, if it is true, we don't bother users with any warning. +/// +/// [^1]: +/// +/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks +fn warn_symlink_checked_out_as_plain_text_file( + gctx: &GlobalContext, + src_files: &[PathEntry], + repo: &git2::Repository, +) -> CargoResult<()> { + if repo + .config() + .and_then(|c| c.get_bool("core.symlinks")) + .unwrap_or(true) + { + return Ok(()); + } + + if src_files.iter().any(|f| f.maybe_plain_text_symlink()) { + let mut shell = gctx.shell(); + shell.warn(format_args!( + "found symbolic links that may be checked out as regular files for git repo at `{}`\n\ + This might cause the `.crate` file to include incorrect or incomplete files", + repo.workdir().unwrap().display(), + ))?; + let extra_note = if cfg!(windows) { + "\nAnd on Windows, enable the Developer Mode to support symlinks" + } else { + "" + }; + shell.note(format_args!( + "to avoid this, set the Git config `core.symlinks` to `true`{extra_note}", + ))?; + } + + Ok(()) +} + /// The real git status check starts from here. fn git( pkg: &Package, gctx: &GlobalContext, - src_files: &[PathBuf], + src_files: &[PathEntry], repo: &git2::Repository, opts: &PackageOpts<'_>, ) -> CargoResult> { @@ -136,6 +185,7 @@ fn git( let mut dirty_src_files: Vec<_> = src_files .iter() .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path))) + .map(|p| p.as_ref()) .chain(dirty_metadata_paths(pkg, repo)?.iter()) .map(|path| { pathdiff::diff_paths(path, cwd) diff --git a/src/cargo/ops/vendor.rs b/src/cargo/ops/vendor.rs index 35d0e0c9417..c6c76f624da 100644 --- a/src/cargo/ops/vendor.rs +++ b/src/cargo/ops/vendor.rs @@ -2,6 +2,7 @@ use crate::core::shell::Verbosity; use crate::core::{GitReference, Package, Workspace}; use crate::ops; use crate::sources::path::PathSource; +use crate::sources::PathEntry; use crate::sources::CRATES_IO_REGISTRY; use crate::util::cache_lock::CacheLockMode; use crate::util::{try_canonicalize, CargoResult, GlobalContext}; @@ -315,13 +316,14 @@ fn sync( fn cp_sources( pkg: &Package, src: &Path, - paths: &[PathBuf], + paths: &[PathEntry], dst: &Path, cksums: &mut BTreeMap, tmp_buf: &mut [u8], gctx: &GlobalContext, ) -> CargoResult<()> { for p in paths { + let p = p.as_ref(); let relative = p.strip_prefix(&src).unwrap(); match relative.to_str() { diff --git a/src/cargo/sources/mod.rs b/src/cargo/sources/mod.rs index 9c98cc49eaa..dfbf79c76bc 100644 --- a/src/cargo/sources/mod.rs +++ b/src/cargo/sources/mod.rs @@ -29,6 +29,7 @@ pub use self::config::SourceConfigMap; pub use self::directory::DirectorySource; pub use self::git::GitSource; +pub use self::path::PathEntry; pub use self::path::PathSource; pub use self::path::RecursivePathSource; pub use self::registry::{ diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index ee2e6fea47f..8bafcf18099 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -94,7 +94,7 @@ impl<'gctx> PathSource<'gctx> { /// use other methods like `.gitignore`, `package.include`, or /// `package.exclude` to filter the list of files. #[tracing::instrument(skip_all)] - pub fn list_files(&self, pkg: &Package) -> CargoResult> { + pub fn list_files(&self, pkg: &Package) -> CargoResult> { list_files(pkg, self.gctx) } @@ -278,7 +278,7 @@ impl<'gctx> RecursivePathSource<'gctx> { /// are relevant for building this package, but it also contains logic to /// use other methods like `.gitignore`, `package.include`, or /// `package.exclude` to filter the list of files. - pub fn list_files(&self, pkg: &Package) -> CargoResult> { + pub fn list_files(&self, pkg: &Package) -> CargoResult> { list_files(pkg, self.gctx) } @@ -404,6 +404,103 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { } } +/// Type that abstracts over [`gix::dir::entry::Kind`] and [`fs::FileType`]. +#[derive(Debug, Clone, Copy)] +enum FileType { + File { maybe_symlink: bool }, + Dir, + Symlink, + Other, +} + +impl From for FileType { + fn from(value: fs::FileType) -> Self { + if value.is_file() { + FileType::File { + maybe_symlink: false, + } + } else if value.is_dir() { + FileType::Dir + } else if value.is_symlink() { + FileType::Symlink + } else { + FileType::Other + } + } +} + +impl From for FileType { + fn from(value: gix::dir::entry::Kind) -> Self { + use gix::dir::entry::Kind; + match value { + Kind::Untrackable => FileType::Other, + Kind::File => FileType::File { + maybe_symlink: false, + }, + Kind::Symlink => FileType::Symlink, + Kind::Directory | Kind::Repository => FileType::Dir, + } + } +} + +/// [`PathBuf`] with extra metadata. +#[derive(Clone, Debug)] +pub struct PathEntry { + path: PathBuf, + ty: FileType, +} + +impl PathEntry { + pub fn into_path_buf(self) -> PathBuf { + self.path + } + + /// Similar to [`std::path::Path::is_file`] + /// but doesn't follow the symbolic link nor make any system call + pub fn is_file(&self) -> bool { + matches!(self.ty, FileType::File { .. }) + } + + /// Similar to [`std::path::Path::is_dir`] + /// but doesn't follow the symbolic link nor make any system call + pub fn is_dir(&self) -> bool { + matches!(self.ty, FileType::Dir) + } + + /// Similar to [`std::path::Path::is_symlink`] + /// but doesn't follow the symbolic link nor make any system call + pub fn is_symlink(&self) -> bool { + matches!(self.ty, FileType::Symlink) + } + + /// Whether this path might be a plain text symlink. + /// + /// Git may check out symlinks as plain text files that contain the link texts, + /// when either `core.symlinks` is `false`, or on Windows. + pub fn maybe_plain_text_symlink(&self) -> bool { + matches!( + self.ty, + FileType::File { + maybe_symlink: true + } + ) + } +} + +impl std::ops::Deref for PathEntry { + type Target = Path; + + fn deref(&self) -> &Self::Target { + self.path.as_path() + } +} + +impl AsRef for PathEntry { + fn as_ref(&self) -> &PathBuf { + &self.path + } +} + fn first_package<'p>( pkg_id: PackageId, pkgs: &'p Vec, @@ -446,7 +543,7 @@ fn first_package<'p>( /// are relevant for building this package, but it also contains logic to /// use other methods like `.gitignore`, `package.include`, or /// `package.exclude` to filter the list of files. -pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { +pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { _list_files(pkg, gctx).with_context(|| { format!( "failed to determine list of files in {}", @@ -456,7 +553,7 @@ pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult CargoResult> { +fn _list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { let root = pkg.root(); let no_include_option = pkg.manifest().include().is_empty(); let git_repo = if no_include_option { @@ -580,7 +677,7 @@ fn list_files_gix( repo: &gix::Repository, filter: &dyn Fn(&Path, bool) -> bool, gctx: &GlobalContext, -) -> CargoResult> { +) -> CargoResult> { debug!("list_files_gix {}", pkg.package_id()); let options = repo .dirwalk_options()? @@ -619,7 +716,7 @@ fn list_files_gix( vec![include, exclude] }; - let mut files = Vec::::new(); + let mut files = Vec::::new(); let mut subpackages_found = Vec::new(); for item in repo .dirwalk_iter(index.clone(), pathspec, Default::default(), options)? @@ -633,7 +730,24 @@ fn list_files_gix( && item.entry.rela_path == "Cargo.lock") }) }) - .map(|res| res.map(|item| (item.entry.rela_path, item.entry.disk_kind))) + .map(|res| { + res.map(|item| { + // Assumption: if a file tracked as a symlink in Git index, and + // the actual file type on disk is file, then it might be a + // plain text file symlink. + // There are exceptions like the file has changed from a symlink + // to a real text file, but hasn't been committed to Git index. + // Exceptions may be rare so we're okay with this now. + let maybe_plain_text_symlink = item.entry.index_kind + == Some(gix::dir::entry::Kind::Symlink) + && item.entry.disk_kind == Some(gix::dir::entry::Kind::File); + ( + item.entry.rela_path, + item.entry.disk_kind, + maybe_plain_text_symlink, + ) + }) + }) .chain( // Append entries that might be tracked in `/target/`. index @@ -651,12 +765,13 @@ fn list_files_gix( // This traversal is not part of a `status()`, and tracking things in `target/` // is rare. None, + false, ) }) .map(Ok), ) { - let (rela_path, kind) = item?; + let (rela_path, kind, maybe_plain_text_symlink) = item?; let file_path = root.join(gix::path::from_bstr(rela_path)); if file_path.file_name().and_then(|name| name.to_str()) == Some("Cargo.toml") { // Keep track of all sub-packages found and also strip out all @@ -701,7 +816,17 @@ fn list_files_gix( } else if (filter)(&file_path, is_dir) { assert!(!is_dir); trace!(" found {}", file_path.display()); - files.push(file_path); + let ty = match kind.map(Into::into) { + Some(FileType::File { .. }) => FileType::File { + maybe_symlink: maybe_plain_text_symlink, + }, + Some(ty) => ty, + None => FileType::Other, + }; + files.push(PathEntry { + path: file_path, + ty, + }); } } @@ -715,7 +840,7 @@ fn list_files_gix( /// is not tracked under a Git repository. fn list_files_walk( path: &Path, - ret: &mut Vec, + ret: &mut Vec, is_root: bool, filter: &dyn Fn(&Path, bool) -> bool, gctx: &GlobalContext, @@ -756,7 +881,16 @@ fn list_files_walk( Ok(entry) => { let file_type = entry.file_type(); if file_type.is_file() || file_type.is_symlink() { - ret.push(entry.into_path()); + // We follow_links(true) here so check if entry was created from a symlink + let ty = if entry.path_is_symlink() { + FileType::Symlink + } else { + file_type.into() + }; + ret.push(PathEntry { + path: entry.into_path(), + ty, + }); } } Err(err) if err.loop_ancestor().is_some() => { @@ -770,7 +904,10 @@ fn list_files_walk( // Otherwise, simply recover from it. // Don't worry about error skipping here, the callers would // still hit the IO error if they do access it thereafter. - Some(path) => ret.push(path.to_path_buf()), + Some(path) => ret.push(PathEntry { + path: path.to_path_buf(), + ty: FileType::Other, + }), None => return Err(err.into()), }, } @@ -801,7 +938,7 @@ fn last_modified_file( let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero()); if mtime > max { max = mtime; - max_path = file; + max_path = file.into_path_buf(); } } trace!("last modified file {}: {}", path.display(), max); diff --git a/tests/testsuite/package.rs b/tests/testsuite/package.rs index b418513eace..9c6d0493f83 100644 --- a/tests/testsuite/package.rs +++ b/tests/testsuite/package.rs @@ -7025,3 +7025,89 @@ src/main.rs "#]]) .run(); } + +#[cargo_test] +fn git_core_symlinks_false() { + if !symlink_supported() { + return; + } + + let git_project = git::new("bar", |p| { + p.file( + "Cargo.toml", + r#" + [package] + name = "bar" + description = "bar" + license = "MIT" + edition = "2021" + documentation = "foo" + "#, + ) + .file("src/lib.rs", "//! This is a module") + .symlink("src/lib.rs", "symlink-lib.rs") + .symlink_dir("src", "symlink-dir") + }); + + let url = git_project.root().to_url().to_string(); + + let p = project().build(); + let root = p.root(); + // Remove the default project layout, + // so we can git-fetch from git_project under the same directory + fs::remove_dir_all(&root).unwrap(); + fs::create_dir_all(&root).unwrap(); + let repo = git::init(&root); + + let mut cfg = repo.config().unwrap(); + cfg.set_bool("core.symlinks", false).unwrap(); + + // let's fetch from git_project so it respects our core.symlinks=false config. + repo.remote_anonymous(&url) + .unwrap() + .fetch(&["HEAD"], None, None) + .unwrap(); + let rev = repo + .find_reference("FETCH_HEAD") + .unwrap() + .peel_to_commit() + .unwrap(); + repo.reset(rev.as_object(), git2::ResetType::Hard, None) + .unwrap(); + + p.cargo("package --allow-dirty") + .with_stderr_data(str![[r#" +[WARNING] found symbolic links that may be checked out as regular files for git repo at `[ROOT]/foo/` +This might cause the `.crate` file to include incorrect or incomplete files +[NOTE] to avoid this, set the Git config `core.symlinks` to `true` +... +[PACKAGING] bar v0.0.0 ([ROOT]/foo) +[PACKAGED] 7 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) +[VERIFYING] bar v0.0.0 ([ROOT]/foo) +[COMPILING] bar v0.0.0 ([ROOT]/foo/target/package/bar-0.0.0) +[FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s + +"#]]) + .run(); + + let f = File::open(&p.root().join("target/package/bar-0.0.0.crate")).unwrap(); + validate_crate_contents( + f, + "bar-0.0.0.crate", + &[ + "Cargo.lock", + "Cargo.toml", + "Cargo.toml.orig", + "src/lib.rs", + // We're missing symlink-dir/lib.rs in the `.crate` file. + "symlink-dir", + "symlink-lib.rs", + ".cargo_vcs_info.json", + ], + [ + // And their contents are incorrect. + ("symlink-dir", str!["[ROOT]/bar/src"]), + ("symlink-lib.rs", str!["[ROOT]/bar/src/lib.rs"]), + ], + ); +}