Skip to content

Commit

Permalink
WIP: directory targets with empty subdirs
Browse files Browse the repository at this point in the history
Signed-off-by: Ambre Austen Suhamy <[email protected]>
  • Loading branch information
ElectreAAS committed Jan 10, 2025
1 parent cd876b2 commit f720346
Show file tree
Hide file tree
Showing 18 changed files with 876 additions and 325 deletions.
4 changes: 2 additions & 2 deletions bin/exec.ml
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,15 @@ let get_path_and_build_if_necessary sctx ~no_rebuild ~dir ~prog =
| Ok p -> build_prog ~no_rebuild ~prog p)
| Relative_to_current_dir ->
let path = Path.relative_to_source_in_build_or_external ~dir prog in
Build_system.file_exists path
Build_system.path_exists path
>>= (function
| true -> Memo.return (Some path)
| false ->
if not (Filename.check_suffix prog ".exe")
then Memo.return None
else (
let path = Path.extend_basename path ~suffix:".exe" in
Build_system.file_exists path
Build_system.path_exists path
>>| function
| true -> Some path
| false -> None))
Expand Down
2 changes: 1 addition & 1 deletion bin/ocaml/utop.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ let term =
let utop_target = Filename.concat dir Utop.utop_exe in
Path.build (Path.Build.relative (Context.build_dir context) utop_target)
in
Build_system.file_exists utop_target
Build_system.path_exists utop_target
>>= function
| false ->
User_error.raise
Expand Down
2 changes: 1 addition & 1 deletion boot/libs.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
let external_libraries = [ "unix"; "threads" ]
let external_libraries = [ "threads.posix" ]

let local_libraries =
[ ("otherlibs/ordering", Some "Ordering", false, None)
Expand Down
262 changes: 175 additions & 87 deletions src/dune_cache/local.ml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ module Target = struct
Path.Build.chmod path ~mode:(Path.Permissions.remove Path.Permissions.write st_perm);
let executable = Path.Permissions.test Path.Permissions.execute st_perm in
Some { executable }
(* FIXME: is it wise to also "create" dirs here? *)
| { Unix.st_kind = Unix.S_DIR; st_perm; _ } ->
let executable = Path.Permissions.test Path.Permissions.execute st_perm in
assert executable;
Some { executable }
| (exception Unix.Unix_error _) | _ -> None
;;
end
Expand Down Expand Up @@ -79,11 +84,27 @@ module Artifacts = struct
(artifacts : Digest.t Targets.Produced.t)
=
let entries =
Targets.Produced.foldi artifacts ~init:[] ~f:(fun target file_digest entries ->
let entry : Metadata_entry.t =
{ file_path = Path.Local.to_string target; file_digest }
in
entry :: entries)
Targets.Produced.foldi
artifacts
~init:[]
~f:(fun target ~is_file file_digest entries ->
(if Targets.Produced.debug_out
then
let open Pp.O in
Pp.to_fmt
Format.std_formatter
(Pp.paragraphf "[StoreMeta %S]" (Path.Local.to_string target) ++ Pp.space));
(* let kind = match file_digest FIXME: sort on enum *)
let entry : Metadata_entry.t =
match file_digest, is_file with
| Some file_digest, _ ->
{ file_path = Path.Local.to_string target; file_digest; is_file }
| None, true -> failwith "PACONTEN"
| None, false ->
let file_digest = Dune_digest.generic "salut" in
{ file_path = Path.Local.to_string target; is_file; file_digest }
in
entry :: entries)
|> List.rev
in
Metadata_file.store ~mode { metadata; entries } ~rule_digest
Expand All @@ -103,12 +124,33 @@ module Artifacts = struct
Result.try_with (fun () ->
(* CR-someday rleshchinskiy: We recreate the directory structure here but it might be
simpler to just use file digests instead of file names and no subdirectories. *)
Path.Local.Map.iteri targets.dirs ~f:(fun path _ ->
Path.mkdir_p (Path.append_local temp_dir path));
Targets.Produced.iteri targets ~f:(fun path _ ->
let path_in_build_dir = Path.build (Path.Build.append_local targets.root path) in
let path_in_temp_dir = Path.append_local temp_dir path in
portable_hardlink_or_copy ~src:path_in_build_dir ~dst:path_in_temp_dir))
(* The comment above seems outdated wrt. 'no subdirectories'... *)
Targets.Produced.iteri
targets
~d:(fun dir _ ->
(if Targets.Produced.debug_out
then
let open Pp.O in
Pp.to_fmt
Format.std_formatter
(Pp.paragraphf "[Store_dir %S]" (Path.Local.to_string dir) ++ Pp.space));
Path.mkdir_p (Path.append_local temp_dir dir))
~f:(fun file _ ->
let path_in_build_dir =
Path.build (Path.Build.append_local targets.root file)
in
let path_in_temp_dir = Path.append_local temp_dir file in
(if Targets.Produced.debug_out
then
let open Pp.O in
Pp.to_fmt
Format.std_formatter
(Pp.paragraphf
"[Store_file: %S -> %S]"
(Path.to_string path_in_build_dir)
(Path.to_string path_in_temp_dir)
++ Pp.space));
portable_hardlink_or_copy ~src:path_in_build_dir ~dst:path_in_temp_dir))
;;

(* Step II of [store_skipping_metadata].
Expand All @@ -118,10 +160,33 @@ module Artifacts = struct
: Digest.t Targets.Produced.t Or_exn.t Fiber.t
=
let open Fiber.O in
let fff path { Target.executable } =
let file = Path.append_local temp_dir path in
(if Targets.Produced.debug_out
then
let open Pp.O in
Pp.to_fmt
Format.std_formatter
(Pp.paragraphf "[CompDigests %S]" (Path.Local.to_string path) ++ Pp.space));
compute_digest ~executable file
in
(* FIXME: nothing special here? *)
Fiber.collect_errors (fun () ->
Targets.Produced.parallel_map targets ~f:(fun path { Target.executable } ->
let file = Path.append_local temp_dir path in
compute_digest ~executable file))
Targets.Produced.parallel_map targets ~f:fff ~d:(fun path ->
function
| None ->
(if Targets.Produced.debug_out
then
let open Pp.O in
Pp.to_fmt
Format.std_formatter
(Pp.paragraphf
"[CompDigests %S without meta]"
(Path.Local.to_string path)
++ Pp.space));
Fiber.return None
| Some _exe -> Fiber.return None
(* FIXME: ??? Fiber.map ~f:Option.some (fff path exe) *)))
>>| Result.map_error ~f:(function
| exn :: _ -> exn.Exn_with_backtrace.exn
| [] -> assert false)
Expand All @@ -132,86 +197,106 @@ module Artifacts = struct
Targets.Produced.foldi
artifacts
~init:Store_result.empty
~f:(fun target digest results ->
let path_in_temp_dir = Path.append_local temp_dir target in
let path_in_cache = file_path ~file_digest:digest in
let store_using_hardlinks () =
match
Dune_cache_storage.Util.Optimistically.link
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception Unix.Unix_error (Unix.EEXIST, _, _) ->
(* We end up here if the cache already contains an entry for this
artifact. We deduplicate by keeping only one copy, in the
cache. *)
let path_in_build_dir =
Path.build (Path.Build.append_local artifacts.root target)
in
(match
Path.unlink_no_err path_in_temp_dir;
(* At first, we deduplicate the temporary file. Doing this
intermediate step allows us to keep the original target in case
the below link step fails. This might happen if the trimmer has
just deleted [path_in_cache]. In this rare case, this function
fails with an [Error], and so we might end up with some
duplicates in the workspace. *)
link_even_if_there_are_too_many_links_already
~src:path_in_cache
~dst:path_in_temp_dir;
(* Now we can simply rename the temporary file into the target,
knowing that the original target remains in place if the
renaming fails.
~f:(fun target ~is_file digest results ->
(if Targets.Produced.debug_out
then
let open Pp.O in
Pp.to_fmt
Format.std_formatter
(Pp.paragraphf "[Store_to_cache %S]" (Path.Local.to_string target)
++ Pp.space));
match digest, is_file with
| None, true -> failwith "paconten"
| _, false ->
(*FIXME: sure about that? *)
results
| Some file_digest, true ->
let path_in_temp_dir = Path.append_local temp_dir target in
let path_in_cache = file_path ~file_digest in
let store_using_hardlinks () =
match
Dune_cache_storage.Util.Optimistically.link
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception Unix.Unix_error (Unix.EEXIST, _, _) ->
(* We end up here if the cache already contains an entry for this
artifact. We deduplicate by keeping only one copy, in the
cache. *)
let path_in_build_dir =
Path.build (Path.Build.append_local artifacts.root target)
in
(match
Path.unlink_no_err path_in_temp_dir;
(* At first, we deduplicate the temporary file. Doing this
intermediate step allows us to keep the original target in case
the below link step fails. This might happen if the trimmer has
just deleted [path_in_cache]. In this rare case, this function
fails with an [Error], and so we might end up with some
duplicates in the workspace. *)
link_even_if_there_are_too_many_links_already
~src:path_in_cache
~dst:path_in_temp_dir;
(* Now we can simply rename the temporary file into the target,
knowing that the original target remains in place if the
renaming fails.
One curious case to think about is if the file in the cache
happens to have the same inode as the file in the workspace. In
that case this deduplication should be a no-op, but the
[rename] operation has a quirk where [path_in_temp_dir] can
remain on disk. This is not a problem because we clean the
temporary directory later. *)
Path.rename path_in_temp_dir path_in_build_dir
with
| exception e -> Store_result.Error e
| () -> Already_present)
| exception e -> Error e
| () -> Stored
in
let store_using_test_and_rename () =
(* CR-someday amokhov: There is a race here. If [path_in_cache] is
created after [Path.exists] but before [Path.rename], it will be
silently overwritten. Find a good way to avoid this race. *)
match Path.exists path_in_cache with
| true -> Store_result.Already_present
| false ->
(match
Dune_cache_storage.Util.Optimistically.rename
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception e -> Error e
| () -> Stored)
in
let result =
match (mode : Dune_cache_storage.Mode.t) with
| Hardlink -> store_using_hardlinks ()
| Copy -> store_using_test_and_rename ()
in
Store_result.combine results result)
One curious case to think about is if the file in the cache
happens to have the same inode as the file in the workspace. In
that case this deduplication should be a no-op, but the
[rename] operation has a quirk where [path_in_temp_dir] can
remain on disk. This is not a problem because we clean the
temporary directory later. *)
Path.rename path_in_temp_dir path_in_build_dir
with
| exception e -> Store_result.Error e
| () -> Already_present)
| exception e -> Error e
| () -> Stored
in
let store_using_test_and_rename () =
(* CR-someday amokhov: There is a race here. If [path_in_cache] is
created after [Path.exists] but before [Path.rename], it will be
silently overwritten. Find a good way to avoid this race. *)
match Path.exists path_in_cache with
| true -> Store_result.Already_present
| false ->
(match
Dune_cache_storage.Util.Optimistically.rename
~src:path_in_temp_dir
~dst:path_in_cache
with
| exception e -> Error e
| () -> Stored)
in
let result =
match (mode : Dune_cache_storage.Mode.t) with
| Hardlink -> store_using_hardlinks ()
| Copy -> store_using_test_and_rename ()
in
Store_result.combine results result)
;;

let store_skipping_metadata ~mode ~targets ~compute_digest
: Store_artifacts_result.t Fiber.t
=
Dune_cache_storage.with_temp_dir ~suffix:"artifacts" (function
| Error exn -> Fiber.return (Store_artifacts_result.Error exn)
| Error exn ->
(* Format.printf "In %s err1, %s@." __FUNCTION__ __LOC__; *)
Fiber.return (Store_artifacts_result.Error exn)
| Ok temp_dir ->
(match store_targets_to ~temp_dir ~targets ~mode with
| Error exn -> Fiber.return (Store_artifacts_result.Error exn)
| Error exn ->
(* Format.printf "In %s err2, %s@." __FUNCTION__ __LOC__; *)
Fiber.return (Store_artifacts_result.Error exn)
| Ok () ->
compute_digests_in ~temp_dir ~targets ~compute_digest
>>| (function
| Error exn -> Store_artifacts_result.Error exn
| Error exn ->
(* Format.printf "In %s err3, %s@." __FUNCTION__ __LOC__; *)
Store_artifacts_result.Error exn
| Ok artifacts ->
(* Format.printf "In %s ok!, %s@." __FUNCTION__ __LOC__; *)
let result = store_to_cache_from ~temp_dir ~mode artifacts in
Store_artifacts_result.of_store_result ~artifacts result)))
;;
Expand Down Expand Up @@ -281,10 +366,7 @@ module Artifacts = struct
| Copy -> copy ~src ~dst);
Unwind.push unwind (fun () -> Path.Build.unlink_no_err target)
in
try
Path.Local.Map.iteri artifacts.dirs ~f:(fun dir _ -> mk_dir dir);
Targets.Produced.iteri artifacts ~f:mk_file
with
try Targets.Produced.iteri artifacts ~f:mk_file ~d:(fun dir _ -> mk_dir dir) with
| exn ->
Unwind.unwind unwind;
reraise exn
Expand All @@ -296,8 +378,14 @@ module Artifacts = struct
let artifacts =
Path.Local.Map.of_list_map_exn
entries
~f:(fun { Metadata_entry.file_path; file_digest } ->
Path.Local.of_string file_path, file_digest)
~f:(fun { Metadata_entry.file_path; file_digest; is_file } ->
(if Targets.Produced.debug_out
then
let open Pp.O in
Pp.to_fmt
Format.std_formatter
(Pp.paragraphf "[Restore: %S]" file_path ++ Pp.space));
Path.Local.of_string file_path, (file_digest, is_file))
|> Targets.Produced.of_files target_dir
in
try
Expand Down
Loading

0 comments on commit f720346

Please sign in to comment.