Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix directory targets with empty subdirs #11226

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 87 additions & 83 deletions src/dune_cache/local.ml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ module Target = struct
Path.Build.chmod path ~mode:(Path.Permissions.remove Path.Permissions.write st_perm);
let executable = Path.Permissions.test Path.Permissions.execute st_perm in
Some { executable }
| { Unix.st_kind = Unix.S_DIR; st_perm; _ } ->
(* Adding "executable" permissions to directories mean we can traverse them. *)
Path.Build.chmod path ~mode:(Path.Permissions.add Path.Permissions.execute st_perm);
(* the value of [executable] here is ignored, but [Some] is meaningful. *)
Some { executable = true }
| (exception Unix.Unix_error _) | _ -> None
;;
end
Expand Down Expand Up @@ -79,10 +84,8 @@ module Artifacts = struct
(artifacts : Digest.t Targets.Produced.t)
=
let entries =
Targets.Produced.foldi artifacts ~init:[] ~f:(fun target file_digest entries ->
let entry : Metadata_entry.t =
{ file_path = Path.Local.to_string target; file_digest }
in
Targets.Produced.foldi artifacts ~init:[] ~f:(fun target digest entries ->
let entry : Metadata_entry.t = { path = Path.Local.to_string target; digest } in
entry :: entries)
|> List.rev
in
Expand All @@ -103,12 +106,16 @@ module Artifacts = struct
Result.try_with (fun () ->
(* CR-someday rleshchinskiy: We recreate the directory structure here but it might be
simpler to just use file digests instead of file names and no subdirectories. *)
Path.Local.Map.iteri targets.dirs ~f:(fun path _ ->
Path.mkdir_p (Path.append_local temp_dir path));
Targets.Produced.iteri targets ~f:(fun path _ ->
let path_in_build_dir = Path.build (Path.Build.append_local targets.root path) in
let path_in_temp_dir = Path.append_local temp_dir path in
portable_hardlink_or_copy ~src:path_in_build_dir ~dst:path_in_temp_dir))
(* The comment above seems outdated wrt. 'no subdirectories'... *)
Targets.Produced.iteri
targets
~d:(fun dir -> Path.mkdir_p (Path.append_local temp_dir dir))
~f:(fun file _ ->
let path_in_build_dir =
Path.build (Path.Build.append_local targets.root file)
in
let path_in_temp_dir = Path.append_local temp_dir file in
portable_hardlink_or_copy ~src:path_in_build_dir ~dst:path_in_temp_dir))
;;

(* Step II of [store_skipping_metadata].
Expand All @@ -120,8 +127,7 @@ module Artifacts = struct
let open Fiber.O in
Fiber.collect_errors (fun () ->
Targets.Produced.parallel_map targets ~f:(fun path { Target.executable } ->
let file = Path.append_local temp_dir path in
compute_digest ~executable file))
compute_digest ~executable (Path.append_local temp_dir path)))
>>| Result.map_error ~f:(function
| exn :: _ -> exn.Exn_with_backtrace.exn
| [] -> assert false)
Expand All @@ -133,70 +139,73 @@ module Artifacts = struct
artifacts
~init:Store_result.empty
~f:(fun target digest results ->
let path_in_temp_dir = Path.append_local temp_dir target in
let path_in_cache = file_path ~file_digest:digest in
let store_using_hardlinks () =
match
Dune_cache_storage.Util.Optimistically.link
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception Unix.Unix_error (Unix.EEXIST, _, _) ->
(* We end up here if the cache already contains an entry for this
artifact. We deduplicate by keeping only one copy, in the
cache. *)
let path_in_build_dir =
Path.build (Path.Build.append_local artifacts.root target)
in
(match
Path.unlink_no_err path_in_temp_dir;
(* At first, we deduplicate the temporary file. Doing this
intermediate step allows us to keep the original target in case
the below link step fails. This might happen if the trimmer has
just deleted [path_in_cache]. In this rare case, this function
fails with an [Error], and so we might end up with some
duplicates in the workspace. *)
link_even_if_there_are_too_many_links_already
~src:path_in_cache
~dst:path_in_temp_dir;
(* Now we can simply rename the temporary file into the target,
knowing that the original target remains in place if the
renaming fails.
match digest with
| None -> results
| Some file_digest ->
let path_in_temp_dir = Path.append_local temp_dir target in
let path_in_cache = file_path ~file_digest in
let store_using_hardlinks () =
match
Dune_cache_storage.Util.Optimistically.link
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception Unix.Unix_error (Unix.EEXIST, _, _) ->
(* We end up here if the cache already contains an entry for this
artifact. We deduplicate by keeping only one copy, in the
cache. *)
let path_in_build_dir =
Path.build (Path.Build.append_local artifacts.root target)
in
(match
Path.unlink_no_err path_in_temp_dir;
(* At first, we deduplicate the temporary file. Doing this
intermediate step allows us to keep the original target in case
the below link step fails. This might happen if the trimmer has
just deleted [path_in_cache]. In this rare case, this function
fails with an [Error], and so we might end up with some
duplicates in the workspace. *)
link_even_if_there_are_too_many_links_already
~src:path_in_cache
~dst:path_in_temp_dir;
(* Now we can simply rename the temporary file into the target,
knowing that the original target remains in place if the
renaming fails.

One curious case to think about is if the file in the cache
happens to have the same inode as the file in the workspace. In
that case this deduplication should be a no-op, but the
[rename] operation has a quirk where [path_in_temp_dir] can
remain on disk. This is not a problem because we clean the
temporary directory later. *)
Path.rename path_in_temp_dir path_in_build_dir
with
| exception e -> Store_result.Error e
| () -> Already_present)
| exception e -> Error e
| () -> Stored
in
let store_using_test_and_rename () =
(* CR-someday amokhov: There is a race here. If [path_in_cache] is
created after [Path.exists] but before [Path.rename], it will be
silently overwritten. Find a good way to avoid this race. *)
match Path.exists path_in_cache with
| true -> Store_result.Already_present
| false ->
(match
Dune_cache_storage.Util.Optimistically.rename
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception e -> Error e
| () -> Stored)
in
let result =
match (mode : Dune_cache_storage.Mode.t) with
| Hardlink -> store_using_hardlinks ()
| Copy -> store_using_test_and_rename ()
in
Store_result.combine results result)
One curious case to think about is if the file in the cache
happens to have the same inode as the file in the workspace. In
that case this deduplication should be a no-op, but the
[rename] operation has a quirk where [path_in_temp_dir] can
remain on disk. This is not a problem because we clean the
temporary directory later. *)
Path.rename path_in_temp_dir path_in_build_dir
with
| exception e -> Store_result.Error e
| () -> Already_present)
| exception e -> Error e
| () -> Stored
in
let store_using_test_and_rename () =
(* CR-someday amokhov: There is a race here. If [path_in_cache] is
created after [Path.exists] but before [Path.rename], it will be
silently overwritten. Find a good way to avoid this race. *)
match Path.exists path_in_cache with
| true -> Store_result.Already_present
| false ->
(match
Dune_cache_storage.Util.Optimistically.rename
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception e -> Error e
| () -> Stored)
in
let result =
match (mode : Dune_cache_storage.Mode.t) with
| Hardlink -> store_using_hardlinks ()
| Copy -> store_using_test_and_rename ()
in
Store_result.combine results result)
;;

let store_skipping_metadata ~mode ~targets ~compute_digest
Expand Down Expand Up @@ -281,10 +290,7 @@ module Artifacts = struct
| Copy -> copy ~src ~dst);
Unwind.push unwind (fun () -> Path.Build.unlink_no_err target)
in
try
Path.Local.Map.iteri artifacts.dirs ~f:(fun dir _ -> mk_dir dir);
Targets.Produced.iteri artifacts ~f:mk_file
with
try Targets.Produced.iteri artifacts ~f:mk_file ~d:mk_dir with
| exn ->
Unwind.unwind unwind;
reraise exn
Expand All @@ -294,10 +300,8 @@ module Artifacts = struct
let restore ~mode ~rule_digest ~target_dir =
Restore_result.bind (list ~rule_digest) ~f:(fun (entries : Metadata_entry.t list) ->
let artifacts =
Path.Local.Map.of_list_map_exn
entries
~f:(fun { Metadata_entry.file_path; file_digest } ->
Path.Local.of_string file_path, file_digest)
Path.Local.Map.of_list_map_exn entries ~f:(fun { Metadata_entry.path; digest } ->
Path.Local.of_string path, digest)
|> Targets.Produced.of_files target_dir
in
try
Expand Down
18 changes: 10 additions & 8 deletions src/dune_cache/shared.ml
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,22 @@ struct
]
in
let update_cached_digests ~targets_and_digests =
Targets.Produced.iteri targets_and_digests ~f:(fun path digest ->
Targets.Produced.iter_files targets_and_digests ~f:(fun path digest ->
Cached_digest.set (Path.Build.append_local targets_and_digests.root path) digest)
in
match
Targets.Produced.map_with_errors
produced_targets
~all_errors:false
~f:(fun target () ->
~f:(fun target ->
(* All of this monad boilerplate seems unnecessary since we don't care about errors... *)
match Local.Target.create target with
| Some t -> Ok t
| None -> Error ())
~d:(fun target ->
match Local.Target.create target with
| Some _ -> Ok ()
| None -> Error ())
~all_errors:false
produced_targets
with
| Error _ -> Fiber.return None
| Ok targets ->
Expand Down Expand Up @@ -190,10 +195,7 @@ struct
~remove_write_permissions:should_remove_write_permissions_on_generated_files
in
match
Targets.Produced.map_with_errors
produced_targets
~all_errors:true
~f:(fun target () -> compute_digest target)
Targets.Produced.map_with_errors ~f:compute_digest ~all_errors:true produced_targets
with
| Ok result -> result
| Error errors ->
Expand Down
8 changes: 4 additions & 4 deletions src/dune_cache/trimmer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ let trim_broken_metadata_entries ~trimmed_so_far =
keep them untrimmed for now. *)
false
| Metadata.Artifacts { entries; _ } ->
List.exists
entries
~f:(fun { Artifacts.Metadata_entry.file_digest; _ } ->
List.exists entries ~f:(function
| { Artifacts.Metadata_entry.digest = Some file_digest; _ } ->
let reference = file_path ~file_digest in
not (Path.exists reference)))
not (Path.exists reference)
| { digest = None; _ } -> false))
in
match should_be_removed with
| true ->
Expand Down
34 changes: 22 additions & 12 deletions src/dune_cache_storage/dune_cache_storage.ml
Original file line number Diff line number Diff line change
Expand Up @@ -215,28 +215,38 @@ end
module Artifacts = struct
module Metadata_entry = struct
type t =
{ file_path : string
; file_digest : Digest.t
{ path : string (** Can have more than one component for directory targets *)
; digest : Digest.t option
(** This digest is always present in case [file_path] points to a file, and absent when it's a directory. *)
}

let equal x y =
Digest.equal x.file_digest y.file_digest && String.equal x.file_path y.file_path
String.equal x.path y.path && Option.equal Digest.equal x.digest y.digest
;;

let to_sexp { file_path; file_digest } =
Sexp.List [ Atom file_path; Atom (Digest.to_string file_digest) ]
let digest_to_sexp = function
| None -> Sexp.Atom "<dir>"
| Some digest -> Sexp.Atom (Digest.to_string digest)
;;

let of_sexp = function
| Sexp.List [ Atom file_path; Atom file_digest ] ->
(match Digest.from_hex file_digest with
| Some file_digest -> Ok { file_path; file_digest }
let to_sexp { path; digest } = Sexp.List [ Atom path; digest_to_sexp digest ]

let digest_of_sexp = function
| "<dir>" -> Ok None
| digest ->
(match Digest.from_hex digest with
| Some digest -> Ok (Some digest)
| None ->
Error
(Failure
(sprintf
"Cannot parse file digest %s in cache metadata entry"
file_digest)))
(sprintf "Cannot parse file digest %S in cache metadata entry" digest)))
;;

let of_sexp = function
| Sexp.List [ Atom path; Atom digest ] ->
(match digest_of_sexp digest with
| Ok digest -> Ok { path; digest }
| Error e -> Error e)
| _ -> Error (Failure "Cannot parse cache metadata entry")
;;
end
Expand Down
5 changes: 3 additions & 2 deletions src/dune_cache_storage/dune_cache_storage.mli
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ end
module Artifacts : sig
module Metadata_entry : sig
type t =
{ file_path : string (** Can have more than one component for directory targets *)
; file_digest : Digest.t
{ path : string (** Can have more than one component for directory targets *)
; digest : Digest.t option
(** This digest is always present in case [file_path] points to a file, and absent when it's a directory. *)
}
end

Expand Down
14 changes: 12 additions & 2 deletions src/dune_engine/build_system.ml
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ end = struct
(* Fact: alias [a] expands to the set of file-digest pairs [digests] *)
Dep.Fact.alias a digests
| File f ->
(* Not necessarily a file. Can also be a directory... *)
let+ digest = build_file f in
(* Fact: file [f] has digest [digest] *)
Dep.Fact.file f digest
Expand Down Expand Up @@ -856,9 +857,12 @@ end = struct
rleshchinskiy: Is this digest ever used? [build_dir] discards it and do we
(or should we) ever use [build_file] to build directories? Perhaps this could
be split in two memo tables, one for files and one for directories. *)
(* ElectreAAS: Tentative answer to above comments: a lot of functions are called
[build_file] or [create_file] even though they also handle directories.
Also yes this digest is used by [Exported.build_dep] defined above. *)
(match Cached_digest.build_file ~allow_dirs:true path with
| Ok digest -> digest, Dir_target { targets }
(* Must be a directory target *)
(* Must be a directory target. *)
| Error _ ->
(* CR-someday amokhov: The most important reason we end up here is
[No_such_file]. I think some of the outcomes above are impossible
Expand Down Expand Up @@ -1060,7 +1064,8 @@ let file_exists fn =
(Path.Build.Map.mem rules_here.by_file_targets (Path.as_in_build_dir_exn fn))
| Build_under_directory_target { directory_target_ancestor } ->
let+ path_map = build_dir (Path.build directory_target_ancestor) in
Targets.Produced.mem path_map (Path.as_in_build_dir_exn fn)
(* Note that in the case of directory targets, we also check if directories exist. *)
Targets.Produced.mem_any path_map (Path.as_in_build_dir_exn fn)
;;

let files_of ~dir =
Expand Down Expand Up @@ -1161,6 +1166,11 @@ let build_file p =
()
;;

let build_dir p =
let+ (_ : Digest.t Targets.Produced.t) = build_dir p in
()
;;

let with_file p ~f =
let+ () = build_file p in
f p
Expand Down
Loading
Loading