diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index e38a824e9..0eb0ffb71 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -23,4 +23,4 @@ jobs: touch Project.toml julia -O3 --project -e 'import Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.add("NCDatasets"); Pkg.precompile()' # Reduce nstep for each example to 10 to avoid the CI job taking too long - julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' + julia -O3 --project -e 'using moment_kinetics; using moment_kinetics.type_definitions: OptionsDict; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", OptionsDict()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", OptionsDict()), "nelement_local", ""); pop!(get(input, "r", OptionsDict()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", OptionsDict()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", OptionsDict()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' diff --git a/.github/workflows/parallel_test.yml b/.github/workflows/parallel_test.yml index 5441dcc82..129063d91 100644 --- a/.github/workflows/parallel_test.yml +++ b/.github/workflows/parallel_test.yml @@ -21,14 +21,15 @@ jobs: - uses: julia-actions/cache@v2 - run: | touch Project.toml - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences", "PackageCompiler"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["NCDatasets", "Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' + julia --project -O3 --check-bounds=no precompile.jl # Need to use openmpi so that we can use `--oversubscribe` to allow using more MPI ranks than physical cores - ./mpiexecjl -np 3 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 - ./mpiexecjl -np 4 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 - ./mpiexecjl -np 2 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 --long + ./mpiexecjl -np 3 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 + ./mpiexecjl -np 4 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 + ./mpiexecjl -np 2 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 --long # Note: MPI.jl's default implementation is mpich, which has a similar option # `--with-device=ch3:sock`, but that needs to be set when compiling mpich. shell: bash @@ -46,12 +47,13 @@ jobs: - uses: julia-actions/cache@v2 - run: | touch Project.toml - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences", "PackageCompiler"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["NCDatasets", "Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' + julia --project -O3 --check-bounds=no precompile.jl # Need to use openmpi so that we can use `--oversubscribe` to allow using more MPI ranks than physical cores - ./mpiexecjl -np 4 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 + ./mpiexecjl -np 4 --oversubscribe julia -J moment_kinetics.so --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --ci --debug 1 # Note: MPI.jl's default implementation is mpich, which has a similar option # `--with-device=ch3:sock`, but that needs to be set when compiling mpich. shell: bash diff --git a/docs/src/developing.md b/docs/src/developing.md index 4b57f1e15..241f4aceb 100644 --- a/docs/src/developing.md +++ b/docs/src/developing.md @@ -41,6 +41,18 @@ String and as a tree of HDF5 variables. use `nothing` as a default for some setting, that is fine, but must be done after the input is read, and not stored in the `input_dict`. +!!! warning "Parallel I/O consistency" + To ensure consistency between all MPI ranks in the order of reads and/or + writes when using Parallel I/O, all dictionary types used to store options + must be either `OrderedDict` or `SortedDict`, so that their order of + entries is deterministic (which is not the case for `Dict`, which instead + optimises for look-up speed). This should mostly be taken care of by using + `moment_kinetics`'s `OptionsDict` type (which is an alias for + `OrderedDict{String,Any}`). We also need to sort the input after it is read + by `TOML`, which is taken care of by + [`moment_kinetics.input_structs.convert_to_sorted_nested_OptionsDict`](@ref). + See also [Parallel I/O](@ref parallel_io_section). + ## Array types @@ -219,6 +231,48 @@ communicator is `comm_block[]`). See also notes on debugging the 'anyv' parallelisation: [Collision operator and 'anyv' region](@ref). +## [Parallel I/O](@id parallel_io_section) + +The code provides an option to use parallel I/O, which allows all output to be +written to a single output file even when using distributed-MPI parallelism - +this is the default option when the linked HDF5 library is compiled with +parallel-I/O support. + +There are a few things to be aware of to ensure parallel I/O works correctly: +* Some operations have to be called simultaneously on all the MPI ranks that + have the output file open. Roughly, these are any operations that change the + 'metadata' of the file, for example opening/closing files, creating + variables, extending dimensions of variables, changing attributes of + variables. Reading or writing the data from a variable does not have to be + done collectively - actually when we write data we ensure that every rank + that is writing writes a non-overlapping slice of the array to avoid + contention that could slow down the I/O (because one rank has to wait for + another) and to avoid slight inconsistencies because it is uncertain which + rank writes the data last. For more details see the [HDF5.jl + documentation](https://juliaio.github.io/HDF5.jl/stable/mpi/#Reading-and-writing-data-in-parallel) + and the [HDF5 + documentation](https://support.hdfgroup.org/archive/support/HDF5/doc/RM/CollectiveCalls.html). +* One important subtlety is that the `Dict` type does not guarantee a + deterministic order of entries. When you iterate over a `Dict`, you can get + the results in a different order at different times or on different MPI + ranks. If we iterated over a `Dict` to create variables to write to an output + file, or to read from a file, then different MPI ranks might (sometimes) get + the variables in a different order, causing errors. We therefore use either + `OrderedDict` or `SortedDict` types for anything that might be written to or + read from an HDF5 file. + +If the collective operations are not done perfectly consistently, the errors +can be extremely non-obvious. The inconsistent operations may appear to execute +correctly, for example because the same number of variables are created, and +the metadata may only actually be written from the rank-0 process, but the +inconsistency may cause errors later. [JTO, 3/11/2024: my best guess as to the +reason for this is that it puts HDF5's 'metadata cache' in inconsistent states +on different ranks, and this means that at some later time the ranks will cycle +some metadata out of the cache in different orders, and then some ranks will be +able to get the metadata from the cache, while others have to read it from the +file. The reading from the file requires some collective MPI call, which is +only called from some ranks and not others, causing the code to hang.] + ## Package structure The structure of the packages in the `moment_kinetics` repo is set up so that diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index b881ddfa7..0652d06c6 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -8429,7 +8429,7 @@ end """ timing_data(run_info; plot_prefix=nothing, threshold=nothing, include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, - figsize=nothing, interactive_figs=nothing, include_legend=true) + figsize=nothing, include_legend=true) Plot timings from different parts of the `moment_kinetics` code. Only timings from function calls during the time evolution loop are included, not from the setup, because we @@ -8463,11 +8463,11 @@ set in `this_input_dict`. When this function is called as part of the settings are read from the post processing input file (by default `post_processing_input.toml`). The function arguments take precedence, if they are given. -If you load GLMakie (by doing `using GLMakie`) before running this function, -`interactive_figs` can be passed one of, or a Vector of, `:times`, `:ncalls`, or `:allocs` -to display an interactive window showing that figure. Multiple values can be passed, but -the DataInspector (which displays information when you hover over the figure) will only -work for the first one in the list (this may be a GLMakie bug?). +If you load GLMakie by doing `using GLMakie` before running this function, but after +calling `using makie_post_processing` (because `CairoMakie` is loaded when the module is +loaded and would take over if you load `GLMakie` before `makie_post_processing`), the +figures will be displayed in interactive windows. When you hover over a line some useful +information will be displayed. Pass `include_legend=false` to remove legends from the figures. This is mostly useful for interactive figures where hovering over the lines can show what they are, so that the @@ -8475,8 +8475,7 @@ legend is not needed. """ function timing_data(run_info::Tuple; plot_prefix=nothing, threshold=nothing, include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, - this_input_dict=nothing, figsize=nothing, interactive_figs=nothing, - include_legend=true) + this_input_dict=nothing, figsize=nothing, include_legend=true) if this_input_dict !== nothing input = Dict_to_NamedTuple(this_input_dict["timing_data"]) @@ -8515,40 +8514,31 @@ function timing_data(run_info::Tuple; plot_prefix=nothing, threshold=nothing, ncalls_ax=ncalls_ax, allocs_ax=allocs_ax, irun=irun, figsize=figsize) end - if interactive_figs === nothing && string(Makie.current_backend()) == "GLMakie" + if string(Makie.current_backend()) == "GLMakie" # Can make interactive plots - if isa(interactive_figs, Symbol) - interactive_figs = [interactive_figs] - end backend = Makie.current_backend() - for figtype ∈ interactive_figs - if figtype == :times - if include_legend - Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(times_fig) - display(backend.Screen(), times_fig) - elseif figtype == :ncalls - if include_legend - Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(ncalls_fig) - display(backend.Screen(), ncalls_fig) - elseif figtype == :allocs - if include_legend - Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(allocs_fig) - display(backend.Screen(), allocs_fig) - else - error("Got unrecognized entry $figtype in `interactive_figs`") - end + if include_legend + Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(times_fig) + display(backend.Screen(), times_fig) + + if include_legend + Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) end + DataInspector(ncalls_fig) + display(backend.Screen(), ncalls_fig) + + if include_legend + Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(allocs_fig) + display(backend.Screen(), allocs_fig) elseif plot_prefix !== nothing if include_legend Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=true, merge=true) @@ -8587,7 +8577,7 @@ end function timing_data(run_info; plot_prefix=nothing, threshold=nothing, include_patterns=nothing, exclude_patterns=nothing, ranks=nothing, this_input_dict=nothing, times_ax=nothing, ncalls_ax=nothing, - allocs_ax=nothing, irun=1, figsize=nothing, interactive_figs=nothing, + allocs_ax=nothing, irun=1, figsize=nothing, include_legend=true) if this_input_dict !== nothing @@ -8658,143 +8648,155 @@ function timing_data(run_info; plot_prefix=nothing, threshold=nothing, end linestyles = linestyle=[:solid, :dash, :dot, :dashdot, :dashdotdot] - for irank ∈ 0:run_info.nrank-1 - all_global_timer_variables = run_info.timing_variable_names[irank] - time_advance_timer_variables = [v for v ∈ all_global_timer_variables if occursin("time_advance! step", v)] - time_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "time:")] - ncalls_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "ncalls:")] - allocs_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "allocs:")] - - this_rank_group = "timing_data/rank$irank" - - inspector_label_func = (self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])" - - function label_irank(ax, variable, color, unit_conversion=1) - if run_info.nrank > 1 - # Label curves with irank so we can tell which is which - index = ((irank + 1) % (length(variable) - 1)) + 1 - with_theme - text!(ax, run_info.time[index], - variable[index] * unit_conversion; - text="$irank", color=color) - end - end - - function check_include_exclude(variable_name) - explicitly_included = (include_patterns !== nothing && - any(occursin(p, variable_name) for p ∈ include_patterns)) - if exclude_patterns === nothing && include_patterns !== nothing - excluded = !explicitly_included - elseif exclude_patterns !== nothing - if !explicitly_included && - any(occursin(p, variable_name) for p ∈ exclude_patterns) - excluded = true - else - excluded = false - end + time_advance_timer_variables = [v for v ∈ run_info.timing_variable_names if occursin("time_advance! step", v)] + time_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "time:")] + ncalls_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "ncalls:")] + allocs_variables = [v for v ∈ time_advance_timer_variables if startswith(v, "allocs:")] + + timing_group = "timing_data" + + function label_irank(ax, variable, irank, color, unit_conversion=1) + if run_info.nrank > 1 + # Label curves with irank so we can tell which is which + index = ((irank + 1) % (length(variable) - 1)) + 1 + with_theme + text!(ax, run_info.time[index], + variable[index] * unit_conversion; + text="$irank", color=color) + end + end + + function check_include_exclude(variable_name) + explicitly_included = (include_patterns !== nothing && + any(occursin(p, variable_name) for p ∈ include_patterns)) + if exclude_patterns === nothing && include_patterns !== nothing + excluded = !explicitly_included + elseif exclude_patterns !== nothing + if !explicitly_included && + any(occursin(p, variable_name) for p ∈ exclude_patterns) + excluded = true else excluded = false end - return excluded, explicitly_included + else + excluded = false end + return excluded, explicitly_included + end - # Plot the total time - time_unit_conversion = 1.0e-9 # ns to s - total_time_variable_name = "time:moment_kinetics;time_advance! step" - total_time = get_variable(run_info, total_time_variable_name * "_per_step", - group=this_rank_group) + # Plot the total time + time_unit_conversion = 1.0e-9 # ns to s + total_time_variable_name = "time:moment_kinetics;time_advance! step" + total_time = get_variable(run_info, total_time_variable_name * "_per_step", + group=timing_group) + for irank ∈ 0:run_info.nrank-1 label = "time_advance! step" - lines!(times_ax, run_info.time, total_time .* time_unit_conversion; color=:black, - linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(times_ax, total_time, :black, time_unit_conversion) - mean_total_time = mean(total_time) - for (variable_counter, variable_name) ∈ enumerate(time_variables) - if variable_name == total_time_variable_name - # Plotted this already - continue - end - excluded, explicitly_included = check_include_exclude(variable_name) - if excluded - continue - end - variable = get_variable(run_info, variable_name * "_per_step", - group=this_rank_group) - if !explicitly_included && mean(variable) < threshold * mean_total_time - # This variable takes a very small amount of time, so skip. - continue - end + irank_slice = total_time[irank+1,:] + lines!(times_ax, run_info.time, irank_slice .* time_unit_conversion; + color=:black, linestyle=linestyles[irun], label=label, + inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(times_ax, irank_slice, irank, :black, time_unit_conversion) + end + mean_total_time = mean(total_time) + for (variable_counter, variable_name) ∈ enumerate(time_variables) + if variable_name == total_time_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=timing_group) + if !explicitly_included && mean(variable) < threshold * mean_total_time + # This variable takes a very small amount of time, so skip. + continue + end + for irank ∈ 0:run_info.nrank-1 label = split(variable_name, "time_advance! step;")[2] - l = lines!(times_ax, run_info.time, variable .* time_unit_conversion; + irank_slice = variable[irank+1,:] + l = lines!(times_ax, run_info.time, irank_slice .* time_unit_conversion; color=Cycled(variable_counter), linestyle=linestyles[irun], - label=label, inspector_label=inspector_label_func) - label_irank(times_ax, variable, l.color, time_unit_conversion) + label=label, inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(times_ax, irank_slice, irank, l.color, time_unit_conversion) end + end - # Plot the number of calls - total_ncalls_variable_name = "ncalls:moment_kinetics;time_advance! step" - total_ncalls = get_variable(run_info, total_ncalls_variable_name * "_per_step", - group=this_rank_group) + # Plot the number of calls + total_ncalls_variable_name = "ncalls:moment_kinetics;time_advance! step" + total_ncalls = get_variable(run_info, total_ncalls_variable_name * "_per_step", + group=timing_group) + for irank ∈ 0:run_info.nrank-1 label = "time_advance! step" - lines!(ncalls_ax, run_info.time, total_ncalls; color=:black, + irank_slice = total_ncalls[irank+1,:] + lines!(ncalls_ax, run_info.time, irank_slice; color=:black, linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(ncalls_ax, total_ncalls, :black) - mean_total_ncalls = mean(total_ncalls) - for (variable_counter, variable_name) ∈ enumerate(ncalls_variables) - if variable_name == total_ncalls_variable_name - # Plotted this already - continue - end - excluded, explicitly_included = check_include_exclude(variable_name) - if excluded - continue - end - variable = get_variable(run_info, variable_name * "_per_step", - group=this_rank_group) - if !explicitly_included && mean(variable) < threshold * mean_total_ncalls - # This variable takes a very small number of calls, so skip. - continue - end + inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(ncalls_ax, irank_slice, irank, :black) + end + mean_total_ncalls = mean(total_ncalls) + for (variable_counter, variable_name) ∈ enumerate(ncalls_variables) + if variable_name == total_ncalls_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=timing_group) + if !explicitly_included && mean(variable) < threshold * mean_total_ncalls + # This variable takes a very small number of calls, so skip. + continue + end + for irank ∈ 0:run_info.nrank-1 label = split(variable_name, "time_advance! step;")[2] - l = lines!(ncalls_ax, run_info.time, variable; color=Cycled(variable_counter), - linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(ncalls_ax, variable, l.color) + irank_slice = variable[irank+1,:] + l = lines!(ncalls_ax, run_info.time, irank_slice; + color=Cycled(variable_counter), linestyle=linestyles[irun], + label=label, inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(ncalls_ax, irank_slice, irank, l.color) end + end - # Plot the total allocs - allocs_unit_conversion = 2^(-20) # bytes to MB - total_allocs_variable_name = "allocs:moment_kinetics;time_advance! step" - total_allocs = get_variable(run_info, total_allocs_variable_name * "_per_step", - group=this_rank_group) + # Plot the total allocs + allocs_unit_conversion = 2^(-20) # bytes to MB + total_allocs_variable_name = "allocs:moment_kinetics;time_advance! step" + total_allocs = get_variable(run_info, total_allocs_variable_name * "_per_step", + group=timing_group) + for irank ∈ 0:run_info.nrank-1 label = "time_advance! step" - lines!(allocs_ax, run_info.time, total_allocs .* allocs_unit_conversion; + irank_slice = total_allocs[irank+1,:] + lines!(allocs_ax, run_info.time, irank_slice .* allocs_unit_conversion; color=:black, linestyle=linestyles[irun], label=label, - inspector_label=inspector_label_func) - label_irank(allocs_ax, total_allocs, :black, allocs_unit_conversion) - mean_total_allocs = mean(total_allocs) - for (variable_counter, variable_name) ∈ enumerate(allocs_variables) - if variable_name == total_allocs_variable_name - # Plotted this already - continue - end - excluded, explicitly_included = check_include_exclude(variable_name) - if excluded - continue - end - variable = get_variable(run_info, variable_name * "_per_step", - group=this_rank_group) - if !explicitly_included && mean(variable) < threshold * mean_total_allocs - # This variable represents a very small amount of allocs, so skip. - continue - end + inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(allocs_ax, irank_slice, irank, :black, allocs_unit_conversion) + end + mean_total_allocs = mean(total_allocs) + for (variable_counter, variable_name) ∈ enumerate(allocs_variables) + if variable_name == total_allocs_variable_name + # Plotted this already + continue + end + excluded, explicitly_included = check_include_exclude(variable_name) + if excluded + continue + end + variable = get_variable(run_info, variable_name * "_per_step", + group=timing_group) + if !explicitly_included && mean(variable) < threshold * mean_total_allocs + # This variable represents a very small amount of allocs, so skip. + continue + end + for irank ∈ 0:run_info.nrank-1 label = split(variable_name, "time_advance! step;")[2] - l = lines!(allocs_ax, run_info.time, variable .* allocs_unit_conversion; + irank_slice = variable[irank+1,:] + l = lines!(allocs_ax, run_info.time, irank_slice .* allocs_unit_conversion; color=Cycled(variable_counter), linestyle=linestyles[irun], - label=label, inspector_label=inspector_label_func) - label_irank(allocs_ax, variable, l.color, - allocs_unit_conversion) + label=label, inspector_label=(self,i,p) -> "$(self.label[]) $irank\nx: $(p[1])\ny: $(p[2])") + label_irank(allocs_ax, irank_slice, irank, l.color, allocs_unit_conversion) end end @@ -8803,37 +8805,28 @@ function timing_data(run_info; plot_prefix=nothing, threshold=nothing, # Can make interactive plots - if isa(interactive_figs, Symbol) - interactive_figs = [interactive_figs] - end backend = Makie.current_backend() - for figtype ∈ interactive_figs - if figtype == :times - if include_legend - Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(times_fig) - display(backend.Screen(), times_fig) - elseif figtype == :ncalls - if include_legend - Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(ncalls_fig) - display(backend.Screen(), ncalls_fig) - elseif figtype == :allocs - if include_legend - Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, - merge=true) - end - DataInspector(allocs_fig) - display(backend.Screen(), allocs_fig) - else - error("Got unrecognized entry $figtype in `interactive_figs`") - end + if include_legend + Legend(times_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(times_fig) + display(backend.Screen(), times_fig) + + if include_legend + Legend(ncalls_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) + end + DataInspector(ncalls_fig) + display(backend.Screen(), ncalls_fig) + + if include_legend + Legend(allocs_fig[2,1], times_ax; tellheight=true, tellwidth=false, + merge=true) end + DataInspector(allocs_fig) + display(backend.Screen(), allocs_fig) else if times_fig !== nothing if include_legend diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 0f80b4992..bf1c45416 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -20,6 +20,7 @@ using ..input_structs using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info using MPI +using OrderedCollections: OrderedDict """ structure containing basic information related to coordinates @@ -204,7 +205,7 @@ function get_coordinate_input(input_dict, name; ignore_MPI=false) if input_dict === nothing boundary_parameters = nothing else - boundary_parameters_defaults = Dict{Symbol,Any}() + boundary_parameters_defaults = OrderedDict{Symbol,Any}() if name == "z" # parameter controlling the cutoff of the ion distribution function in the vpa # domain at the wall in z diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 6fbd35a96..2eeeb3ac1 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -31,6 +31,7 @@ using ..timer_utils using ..velocity_moments: get_density using MPI +using OrderedCollections: OrderedDict """ setup_external_sources!(input_dict, r, z) @@ -156,10 +157,14 @@ function setup_external_sources!(input_dict, r, z, electron_physics) * "density_midpoint_control, energy, alphas, alphas-with-losses, " * "beam, beam-with-losses") end - return ion_source_data(; Dict(Symbol(k)=>v for (k,v) ∈ input)..., r_amplitude, - z_amplitude=z_amplitude, PI_density_target=PI_density_target, - PI_controller_amplitude, controller_source_profile, - PI_density_target_ir, PI_density_target_iz, PI_density_target_rank) + return ion_source_data(; OrderedDict(Symbol(k)=>v for (k,v) ∈ input)..., + r_amplitude=r_amplitude, z_amplitude=z_amplitude, + PI_density_target=PI_density_target, + PI_controller_amplitude=PI_controller_amplitude, + controller_source_profile=controller_source_profile, + PI_density_target_ir=PI_density_target_ir, + PI_density_target_iz=PI_density_target_iz, + PI_density_target_rank=PI_density_target_rank) end function get_settings_neutrals(source_index, active_flag) @@ -313,10 +318,14 @@ function setup_external_sources!(input_dict, r, z, electron_physics) * "beam, beam-with-losses, recycling (for neutrals only)") end - return neutral_source_data(; Dict(Symbol(k)=>v for (k,v) ∈ input)..., r_amplitude, - z_amplitude=z_amplitude, PI_density_target=PI_density_target, - PI_controller_amplitude, controller_source_profile, - PI_density_target_ir, PI_density_target_iz, PI_density_target_rank) + return neutral_source_data(; OrderedDict(Symbol(k)=>v for (k,v) ∈ input)..., + r_amplitude=r_amplitude, z_amplitude=z_amplitude, + PI_density_target=PI_density_target, + PI_controller_amplitude=PI_controller_amplitude, + controller_source_profile=controller_source_profile, + PI_density_target_ir=PI_density_target_ir, + PI_density_target_iz=PI_density_target_iz, + PI_density_target_rank=PI_density_target_rank) end function get_settings_electrons(i, ion_settings) # Note most settings for the electron source are copied from the ion source, @@ -339,9 +348,12 @@ function setup_external_sources!(input_dict, r, z, electron_physics) end input["source_strength"] = ion_settings.source_strength end - return electron_source_data(input["source_strength"], input["source_T"], - ion_settings.active, ion_settings.r_amplitude, - ion_settings.z_amplitude, ion_settings.source_type) + return electron_source_data(source_strength=input["source_strength"], + source_T=input["source_T"], + active=ion_settings.active, + r_amplitude=ion_settings.r_amplitude, + z_amplitude=ion_settings.z_amplitude, + source_type=ion_settings.source_type) end # put all ion sources into ion_source_data struct vector diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 227d6eb66..5630aaf6d 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -15,7 +15,7 @@ using ..debugging using ..input_structs using ..looping using ..timer_utils -using ..timer_utils: timer_names_per_rank_moments, timer_names_per_rank_dfns, +using ..timer_utils: timer_names_all_ranks_moments, timer_names_all_ranks_dfns, TimerNamesDict, SortedDict using ..moment_kinetics_structs: scratch_pdf, em_fields_struct using ..type_definitions: mk_float, mk_int @@ -434,29 +434,6 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe external_source_settings, input_dict, restart_time_index, previous_runs_info, time_for_setup, t_params, nl_solver_params) - # A Dict to store the names of timers that have been created on each MPI rank - if block_rank[] == 0 - if io_input.parallel_io - # Need timer name for every rank, as variable creation and extension are - # collective operations. - for irank ∈ 0:global_size[]-1 - timer_names_per_rank_moments[irank] = (TimerNamesDict(), Ref(0)) - timer_names_per_rank_dfns[irank] = (TimerNamesDict(), Ref(0)) - end - else - # Timers only written from their own block, so only names from this block are - # needed. - for irank_in_block ∈ 0:block_size[]-1 - this_global_rank = global_rank[] + irank_in_block - timer_names_per_rank_moments[this_global_rank] = (TimerNamesDict(), Ref(0)) - timer_names_per_rank_dfns[this_global_rank] = (TimerNamesDict(), Ref(0)) - end - end - else - timer_names_per_rank_moments[global_rank[]] = (TimerNamesDict(), Ref(0)) - timer_names_per_rank_dfns[global_rank[]] = (TimerNamesDict(), Ref(0)) - end - begin_serial_region() @serial_region begin # Only read/write from first process in each 'block' @@ -494,8 +471,8 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe return ascii, io_moments, io_dfns end - # For other processes in the block, return (nothing, nothing, nothing) - return nothing, nothing, nothing + # For other processes in the block, return objects with just the input. + return nothing, (io_input=io_input,), (io_input=io_input,) end """ @@ -605,8 +582,8 @@ function setup_electron_io(io_input, vpa, vperp, z, r, composition, collisions, return file_info end - # For other processes in the block, return nothing - return nothing + # For other processes in the block, return an object with just the input. + return (io_input=io_input,) end """ @@ -711,6 +688,8 @@ Write a single variable to a file or group. If a description or units are passed attributes of the variable. If `overwrite=true` is passed, overwrite the variable if it already exists in the file. +Note that when overwriting a `String` variable, the new `String` must have exactly the +same length as the original `String`. """ function write_single_value! end @@ -1114,9 +1093,6 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, dynamic = create_io_group(fid, "dynamic_data", description="time evolving variables") timing = create_io_group(fid, "timing_data", description="timing data to check run-time performance") - for i ∈ 0:global_size[]-1 - create_io_group(timing, "rank$i", description="timing data for MPI rank $i") - end io_time = create_dynamic_variable!(dynamic, "time", mk_float; parallel_io=parallel_io, description="simulation time") @@ -2150,8 +2126,9 @@ function setup_moments_io(prefix, io_input, vz, vr, vzeta, vpa, vperp, r, z, return file_info end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("setup_moments_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2253,8 +2230,9 @@ function reopen_moments_io(file_info) getvar("nl_solver_diagnostics"), io_input) end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("reopen_moments_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2309,8 +2287,9 @@ function setup_dfns_io(prefix, io_input, boundary_distributions, r, z, vperp, vp return file_info end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("setup_dfns_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2425,8 +2404,9 @@ function reopen_dfns_io(file_info) getvar("f_neutral_start_last_timestep"), io_input, io_moments) end - # For processes other than the root process of each shared-memory group... - return nothing + # Should not be called processes other than the root process of each shared-memory + # group... + error("reopen_dfns_io() called by non-block-root block_rank[]=$(block_rank[])") end """ @@ -2464,7 +2444,7 @@ file io_or_file_info_moments, t_idx, time_for_run, t_params, nl_solver_params, r, z, dfns=false) = begin - io_moments = nothing + io_moments = io_or_file_info_moments @serial_region begin # Only read/write from first process in each 'block' @@ -2535,56 +2515,60 @@ function write_timing_data(io_moments, t_idx, dfns=false) # hard-coded list of every timer (which would make it inconvenient to add new timers # or debug timers). # - # As well, when using parallel I/O all processes (in the communicator that opens the - # output file) must create and extend variables, although it's OK for only one process - # to write a variable if the variable is a scalar. - # - # These two facts mean that every process in `comm_inter_block[]` needs to know the - # complete list of timers on every rank, so we have to gather this list. The list is - # stored in `timer_names_per_rank_moments` and `timer_names_per_rank_dfns`. We store - # two separate lists because moments and dfns might not be written at the same times, - # so some timers may be newly added in one but not the other, and so need to be - # tracked separately. + # It is most convenient for every process in `comm_world` to know the complete list + # of timers that exist on any rank, so we have to gather this list. The list is + # stored in `timer_names_all_ranks_moments` and `timer_names_all_ranks_dfns`. We + # store two separate lists because moments and dfns might not be written at the same + # times, so some timers may be newly added in one but not the other, and so need to + # be tracked separately. # - # In order to make sure that every process in `comm_inter_block[]` deals with the same - # variable at the same time, we store the variable names for each rank in a nested - # SortedDict (with the same nesting structure as the `global_timer` on each rank). The - # sort applied to the keys of a SortedDict means that we can be sure to iterate - # through the names in the same order on every process. The names for every rank are - # collected onto each process in `comm_inter_block[]`, but on the rest of the - # processes, only the names from that individual process are collected (as 'the rest' - # do not write data, so do not need information from other processes). + # In order to make sure that every process deals with the same variable at the same + # time, we store the variable names for each rank in a nested SortedDict (with the + # same nesting structure as the `global_timer` on each rank). The sort applied to + # the keys of a SortedDict means that we can be sure to iterate through the names in + # the same order on every process. # - # Only the process in the same shared-memory block as `irank` collects the timing data - # from `irank` and writes it to file. + # The timing data for every process in a shared-memory block is collected to the + # root process of the block to be written to file. # # In order to avoid communicating many strings at every output step, we first check - # how many timers there are on each rank, and compare that to the number in the - # collected list. The number of new variables is gathered, and if there are no new - # variables, no communication of variable names needs to be done. If communication is - # needed, only the names of the new variables need to be gathered. + # how new timers there are on each rank, that are not in the global list of timers. + # The number of new variables is gathered, and if there are no new variables, no + # communication of variable names needs to be done. If communication is needed, only + # the names of the new variables need to be gathered. # # Once the lists of variable names have been updated, the timing data is gathered onto # the root process of each shared-memory block. The data is communicated as vectors of # integers, with the order of the entries being determined by the order of the nested # SortedDict. # - # Each entry in `timer_names_per_rank_moments` and `timer_names_per_rank_dfns` is a - # SortedDict, even if it does not yet contain any other entries, because sub-timers - # might be added at any point. + # Each entry in `timer_names_all_ranks_moments` and `timer_names_all_ranks_dfns` is + # a SortedDict, even if it does not yet contain any other entries, because + # sub-timers might be added at any point. if dfns - timer_names_per_rank = timer_names_per_rank_dfns + timer_names_all_ranks = timer_names_all_ranks_dfns else - timer_names_per_rank = timer_names_per_rank_moments + timer_names_all_ranks = timer_names_all_ranks_moments + end + + # Collect the names that have not been used on any rank before this call. + unique_new_names = String[] + + if block_rank[] == 0 + io_group = get_group(io_moments.fid, "timing_data") end # Find any new timer names on this process. - function get_new_timer_names(timer_names_dict) + function get_new_timer_names() new_timer_names = String[] function get_names_inner(this_timer, timer_names_subdict, prefix) names_subdict_keys = keys(timer_names_subdict) - for k ∈ keys(this_timer.inner_timers) + inner_timers_keys = collect(keys(this_timer.inner_timers)) + # Sort keys to ensure that the list created by this function has a + # deterministic order. + sort!(inner_timers_keys) + for k ∈ inner_timers_keys this_name = prefix == "" ? k : prefix * ";" * k if k ∉ names_subdict_keys push!(new_timer_names, this_name) @@ -2596,49 +2580,29 @@ function write_timing_data(io_moments, t_idx, dfns=false) end return nothing end - get_names_inner(global_timer, timer_names_dict, "") + get_names_inner(global_timer, timer_names_all_ranks, "") return new_timer_names end - # Calculate the total number of entries in a nested Dict. This is used to store the - # total size of each entry of `timer_names_per_rank_moments` and - # `timer_names_per_rank_dfns` so that we know the size of the arrays of data that need - # to be communicated. - function get_size_of_timer_dict(this_dict) - # One count for the timer represented by this_dict. - counter = 1 - - # Recursively iterate through all contained Dicts, and add them to the counter. - for sub_dict ∈ values(this_dict) - counter += get_size_of_timer_dict(sub_dict) - end - return counter - end - - # Add the new timer names in to `timer_names_per_rank_moments` or - # `timer_names_per_rank_dfns`. - function add_new_timer_names!(timer_names, new_timer_names, irank=nothing) + # Add the new timer names in to `timer_names_all_ranks_moments` or + # `timer_names_all_ranks_dfns`. Note that there may be duplicate names in + # new_timer_names, and these will be ignored. + function add_new_timer_names!(new_timer_names) for n ∈ new_timer_names - if irank === nothing - this_rank, this_name = split(n, ":") - this_rank = parse(mk_int, this_rank) - this_dict, _ = timer_names_per_rank[this_rank] - else - this_name = n - this_dict, _ = timer_names_per_rank[irank] - end - split_name = split(this_name, ";") - for level ∈ split_name - if level ∉ keys(this_dict) - this_dict[level] = TimerNamesDict() + this_dict_all_ranks = timer_names_all_ranks + split_name = split(n, ";") + n_levels = length(split_name) + for (i, level) ∈ enumerate(split_name) + if level ∉ keys(this_dict_all_ranks) + this_dict_all_ranks[level] = TimerNamesDict() + if i == n_levels + # New variable that has not been used on any rank before. + push!(unique_new_names, n) + end end - this_dict = this_dict[level] + this_dict_all_ranks = this_dict_all_ranks[level] end end - for (timer_names_dict, timer_names_dict_size) ∈ values(timer_names_per_rank) - # Subtract 1 because we do not want to count the 'top level' as an entry. - timer_names_dict_size[] = get_size_of_timer_dict(timer_names_dict) - 1 - end return nothing end @@ -2646,13 +2610,14 @@ function write_timing_data(io_moments, t_idx, dfns=false) # simultaneously by all processes in `comm_inter_block[]`. function create_new_timer_io_variables!(new_timer_names, timer_group, parallel_io) for n ∈ new_timer_names - this_rank, this_name = split(n, ":") - rank_group = get_group(timer_group, "rank$this_rank") - create_dynamic_variable!(rank_group, "time:" * this_name, mk_int; + create_dynamic_variable!(io_group, "time:" * n, mk_int, + (name="rank", n=global_size[]); parallel_io=parallel_io) - create_dynamic_variable!(rank_group, "ncalls:" * this_name, mk_int; + create_dynamic_variable!(io_group, "ncalls:" * n, mk_int, + (name="rank", n=global_size[]); parallel_io=parallel_io) - create_dynamic_variable!(rank_group, "allocs:" * this_name, mk_int; + create_dynamic_variable!(io_group, "allocs:" * n, mk_int, + (name="rank", n=global_size[]); parallel_io=parallel_io) end return nothing @@ -2661,305 +2626,167 @@ function write_timing_data(io_moments, t_idx, dfns=false) # Once all the names are known, use this function to collect all the data from timers # on this process into arrays to be communicated. function get_data_from_timers() - sorted_timer_names = timer_names_per_rank[global_rank[]][1] times = mk_int[] ncalls = mk_int[] allocs = mk_int[] function walk_through_timers(names_dict, timer) - push!(times, timer.accumulated_data.time) - push!(ncalls, timer.accumulated_data.ncalls) - push!(allocs, timer.accumulated_data.allocs) + if timer === nothing + # Timer not found on this rank, so set to 0 + push!(times, 0.0) + push!(ncalls, 0.0) + push!(allocs, 0.0) + else + push!(times, timer.accumulated_data.time) + push!(ncalls, timer.accumulated_data.ncalls) + push!(allocs, timer.accumulated_data.allocs) + end # Note that here we have to get the order of entries from names_dict (which is # a SortedDict) to ensure that the order of each list is consistent between # all different processes. for (sub_name, sub_dict) ∈ pairs(names_dict) - walk_through_timers(sub_dict, timer[sub_name]) + if timer === nothing || sub_name ∉ keys(timer.inner_timers) + walk_through_timers(sub_dict, nothing) + else + walk_through_timers(sub_dict, timer[sub_name]) + end end end - for name ∈ keys(sorted_timer_names) - walk_through_timers(sorted_timer_names[name], global_timer[name]) + for name ∈ keys(timer_names_all_ranks) + walk_through_timers(timer_names_all_ranks[name], global_timer[name]) end return times, ncalls, allocs end - if block_rank[] == 0 - parallel_io = io_moments.io_input.parallel_io - end - - # First count how many new timer names there are on all the processes in this block - if block_rank[] == 0 - n_new_names = Vector{mk_int}(undef, block_size[]) - new_names_iblock = get_new_timer_names(timer_names_per_rank[global_rank[]][1]) - n_new_names[1] = length(new_names_iblock) - reqs = [MPI.Irecv!(@view(n_new_names[irank+1:irank+1]), comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - MPI.Waitall(reqs) - block_total_new_names = sum(n_new_names) + parallel_io = io_moments.io_input.parallel_io + if parallel_io + comm = comm_world + comm_size = global_size[] + comm_rank = global_rank[] else - new_names_iblock = get_new_timer_names(timer_names_per_rank[global_rank[]][1]) - add_new_timer_names!(timer_names_per_rank, new_names_iblock, global_rank[][1]) - n_new_names_iblock = Ref(length(new_names_iblock)) - MPI.Send(n_new_names_iblock, comm_block[]; dest=0) + comm = comm_block[] + comm_size = block_size[] + comm_rank = block_rank[] end - # Gather new names, from any processes in the block that have new names. - if block_rank[] == 0 - if block_total_new_names > 0 - new_names = SortedDict{mk_int,Vector{String}}() - if n_new_names[1] > 0 - new_names[0] = new_names_iblock - else - new_names[0] = String[] - end - for iblock ∈ 1:block_size[]-1 - if n_new_names[iblock+1] > 0 - names_length = Ref(0) - MPI.Recv!(names_length, comm_block[]; source=iblock) - names_char_vector = Vector{Char}(undef, names_length[]) - MPI.Recv!(names_char_vector, comm_block[]; source=iblock) - new_names[iblock] = split(string(names_char_vector...), "&") - else - new_names[iblock] = String[] - end - end - end - else - if n_new_names_iblock[] > 0 - names_string = join(new_names_iblock, "&") - names_char_vector = [names_string...] - names_length = Ref(length(names_char_vector)) - MPI.Send(names_length, comm_block[]; dest=0) - MPI.Send(names_char_vector, comm_block[]; dest=0) - end - end - - # Allgather new names onto all ranks in comm_inter_block[]. - # If parallel_io=false, no need to communicate between different blocks, as each block - # writes output independently. - if block_rank[] == 0 && parallel_io - # Get total number of new names - global_total_new_names = MPI.Allreduce(block_total_new_names, +, - comm_inter_block[]) - - if global_total_new_names > 0 - # Pack all new names for a block into a single string for communication. - this_block_string = "" - for irank ∈ 0:block_size[]-1 - this_global_rank = global_rank[] + irank - this_rank_string = string(("$this_global_rank:" * s * "&" for s ∈ new_names[irank])...) - this_block_string = join([this_block_string, this_rank_string]) - end - - # Get the sizes of the per-block strings that need to be gathered - string_sizes = Vector{mk_int}(undef, n_blocks[]) - string_sizes[iblock_index[]+1] = length(this_block_string) - string_buffer = MPI.UBuffer(string_sizes, 1) - MPI.Allgather!(string_buffer, comm_inter_block[]) - - # Gather the strings - gathered_char_vector = Vector{Char}(undef, sum(string_sizes)) - local_start_index = sum(string_sizes[1:iblock_index[]]) + 1 - local_end_index = local_start_index - 1 + string_sizes[iblock_index[]+1] - gathered_char_vector[local_start_index:local_end_index] .= [this_block_string...] - gathered_buffer = MPI.VBuffer(gathered_char_vector, string_sizes) - MPI.Allgatherv!(gathered_buffer, comm_inter_block[]) + # First count how many new timer names all processes + new_names_this_rank = get_new_timer_names() + n_new_names = Ref(length(new_names_this_rank)) + MPI.Allreduce!(n_new_names, +, comm) - # The string will end with a "&", so we need to slice off the final element, which - # will be an empty string. - all_new_names = split(string(gathered_char_vector...), "&")[1:end-1] + # Allgather new names onto all ranks - if parallel_io=true this is all ranks in + # comm_world, if parallel_io=false it is all ranks in comm_block[] (as each block + # writes output independently). + if n_new_names[] > 0 + # Pack all new names into a single string for communication. + new_names_string = string((s * "&" for s ∈ new_names_this_rank)...) - # Add the new names to timer_names_per_rank - add_new_timer_names!(timer_names_per_rank, all_new_names) + # Get the sizes of the per-rank strings that need to be gathered + string_sizes = Vector{mk_int}(undef, comm_size) + string_sizes[comm_rank+1] = length(new_names_string) + string_buffer = MPI.UBuffer(string_sizes, 1) + MPI.Allgather!(string_buffer, comm) - create_new_timer_io_variables!(all_new_names, - get_group(io_moments.fid, "timing_data"), - parallel_io) - end - elseif block_rank[] == 0 - # This is the `parallel_io=false` case - if block_total_new_names > 0 - # Put irank prefixes onto timer names - for irank ∈ 0:block_size[] - 1 - this_global_rank = global_rank[] + irank - names_without_prefix = new_names[irank] - new_names[irank] = ["$this_global_rank:" * s for s ∈ names_without_prefix] - end + # Gather the strings + gathered_char_vector = Vector{Char}(undef, sum(string_sizes)) + local_start_index = sum(string_sizes[1:comm_rank]) + 1 + local_end_index = local_start_index - 1 + string_sizes[comm_rank+1] + gathered_char_vector[local_start_index:local_end_index] .= [new_names_string...] + gathered_buffer = MPI.VBuffer(gathered_char_vector, string_sizes) + MPI.Allgatherv!(gathered_buffer, comm) - all_new_names = vcat((new_names[irank] for irank ∈ 0:block_size[]-1)...) + # The string will end with a "&", so we need to slice off the final element, which + # will be an empty string. + all_new_names = split(string(gathered_char_vector...), "&")[1:end-1] - # Add the new names to timer_names_per_rank - add_new_timer_names!(timer_names_per_rank, all_new_names) + # Add the new names to timer_names_per_rank + add_new_timer_names!(all_new_names) - create_new_timer_io_variables!(all_new_names, - get_group(io_moments.fid, "timing_data"), - parallel_io) + if block_rank[] == 0 + create_new_timer_io_variables!(unique_new_names, io_group, parallel_io) end end # Collect the timing data onto the root process of each block - if block_rank[] == 0 - times_data = Dict{mk_int,Vector{mk_int}}() - ncalls_data = Dict{mk_int,Vector{mk_int}}() - allocs_data = Dict{mk_int,Vector{mk_int}}() - times_data[0], ncalls_data[0], allocs_data[0] = get_data_from_timers() - for irank ∈ 1:block_size[]-1 - this_global_rank = global_rank[] + irank - times_data[irank] = zeros(mk_int, timer_names_per_rank[this_global_rank][2][]) - ncalls_data[irank] = zeros(mk_int, timer_names_per_rank[this_global_rank][2][]) - allocs_data[irank] = zeros(mk_int, timer_names_per_rank[this_global_rank][2][]) - end - times_reqs = MPI.Request[MPI.Irecv!(times_data[irank], comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - ncalls_reqs = MPI.Request[MPI.Irecv!(ncalls_data[irank], comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - allocs_reqs = MPI.Request[MPI.Irecv!(allocs_data[irank], comm_block[]; source=irank) - for irank ∈ 1:block_size[]-1] - MPI.Waitall(MPI.Request[times_reqs..., ncalls_reqs..., allocs_reqs...]) - else - times_data, ncalls_data, allocs_data = get_data_from_timers() - times_req = MPI.Isend(times_data, comm_block[]; dest=0) - ncalls_req = MPI.Isend(ncalls_data, comm_block[]; dest=0) - allocs_req = MPI.Isend(allocs_data, comm_block[]; dest=0) - MPI.Waitall([times_req, ncalls_req, allocs_req]) - end + times_data, ncalls_data, allocs_data = get_data_from_timers() + n_timers = length(times_data) + gathered_times_data = MPI.Gather(times_data, comm_block[]; root=0) + gathered_ncalls_data = MPI.Gather(ncalls_data, comm_block[]; root=0) + gathered_allocs_data = MPI.Gather(allocs_data, comm_block[]; root=0) if block_rank[] == 0 + gathered_times_data = reshape(gathered_times_data, n_timers, block_size[]) + gathered_ncalls_data = reshape(gathered_ncalls_data, n_timers, block_size[]) + gathered_allocs_data = reshape(gathered_allocs_data, n_timers, block_size[]) + # Write the timer variables - for irank ∈ keys(timer_names_per_rank) - this_rank_block = irank ÷ block_size[] - this_rank_rank = irank % block_size[] - this_rank_names, this_rank_nvars = timer_names_per_rank[irank] - - # Only the process in the same shared-memory block as the data came from - # actually has the data, so this process is the one that writes it. - write_from_this_rank = (this_rank_block == iblock_index[]) - - # We iterate through the variables in the same order as they were packed into - # the array in `get_data_from_timers()`, so `counter` gets the corresponding - # data from the flattened array that was communicated. - counter = 1 - io_group = get_group(io_moments.fid, "timing_data/rank$irank") - - # For timers on the root process of each shared memory block, we do not need - # to communicate so we can write the data directly from the timers. This - # should help ensure that at least for these processes the timer data has the - # right name in the output file since no packing/unpacking of flattened arrays - # is needed. This verification (since times can then be compared to other - # processes with the expectation that most of them are similar) justifies the - # partial code duplication in having this function be separate from - # `write_for_irank()`. - function write_for_block_root(names_dict, this_name, this_timer, - this_top_level_name=this_name) - io_time = io_group["time:" * this_name] - io_ncalls = io_group["ncalls:" * this_name] - io_allocs = io_group["allocs:" * this_name] - if t_idx < 0 - # The top-level timer (usually "moment_kinetics" was probably the - # first one created. It definitely exists, and should have been - # written at each timestep. - # If we got the length of `time:$this_name`, the variable might have - # the wrong length (e.g. if it has only just been created and has - # length 1). - this_t_idx = length(io_group["time:" * this_top_level_name]) - else - this_t_idx = t_idx - end - if write_from_this_rank - append_to_dynamic_var(io_time, this_timer.accumulated_data.time, - this_t_idx, parallel_io; - write_from_this_rank=true) - append_to_dynamic_var(io_ncalls, this_timer.accumulated_data.ncalls, - this_t_idx, parallel_io; - write_from_this_rank=true) - append_to_dynamic_var(io_allocs, this_timer.accumulated_data.allocs, - this_t_idx, parallel_io; - write_from_this_rank=true) - else - append_to_dynamic_var(io_time, nothing, this_t_idx, - parallel_io; write_from_this_rank=false) - append_to_dynamic_var(io_ncalls, nothing, this_t_idx, - parallel_io; write_from_this_rank=false) - append_to_dynamic_var(io_allocs, nothing, this_t_idx, - parallel_io; write_from_this_rank=false) - end - counter += 1 - for (sub_name, sub_dict) ∈ pairs(names_dict) - write_for_block_root(sub_dict, this_name * ";" * sub_name, - this_timer === nothing ? nothing : - this_timer[sub_name], this_top_level_name) - end - end - # Write data for non-root processes in the shared memory block, which must be - # unpacked from the arrays that were communicated. - function write_for_irank(names_dict, this_name, this_top_level_name=this_name) - io_time = io_group["time:" * this_name] - io_ncalls = io_group["ncalls:" * this_name] - io_allocs = io_group["allocs:" * this_name] - if t_idx < 0 - # The top-level timer (usually "moment_kinetics" was probably the - # first one created. It definitely exists, and should have been - # written at each timestep. - # If we got the length of `time:$this_name`, the variable might have - # the wrong length (e.g. if it has only just been created and has - # length 1). - this_t_idx = length(io_group["time:" * this_top_level_name]) - else - this_t_idx = t_idx - end - if write_from_this_rank - irank_in_block = irank % block_size[] - append_to_dynamic_var(io_time, times_data[irank_in_block][counter], - this_t_idx, parallel_io, - write_from_this_rank=true) - append_to_dynamic_var(io_ncalls, ncalls_data[irank_in_block][counter], - this_t_idx, parallel_io, - write_from_this_rank=true) - append_to_dynamic_var(io_allocs, allocs_data[irank_in_block][counter], - this_t_idx, parallel_io, - write_from_this_rank=true) - else - append_to_dynamic_var(io_time, nothing, this_t_idx, - parallel_io, write_from_this_rank=false) - append_to_dynamic_var(io_ncalls, nothing, this_t_idx, - parallel_io, write_from_this_rank=false) - append_to_dynamic_var(io_allocs, nothing, this_t_idx, - parallel_io, write_from_this_rank=false) - end - counter += 1 - for (sub_name, sub_dict) ∈ pairs(names_dict) - write_for_irank(sub_dict, this_name * ";" * sub_name, this_top_level_name) - end - end + # We iterate through the variables in the same order as they were packed into + # the array in `get_data_from_timers()`, so `counter` gets the corresponding + # data from the flattened array that was communicated. + counter = 1 - if this_rank_rank == 0 - # This was the data from the root process of a shared-memory block. - for top_level_name ∈ keys(this_rank_names) - write_for_block_root(this_rank_names[top_level_name], top_level_name, - write_from_this_rank ? global_timer[top_level_name] : nothing) - end - else - # This was data from a process that is not the root of a shared-memory - # block, so had to be packed, communicated, and unpacked. - for top_level_name ∈ keys(this_rank_names) - write_for_irank(this_rank_names[top_level_name], top_level_name) - end - end - if counter != this_rank_nvars[] + 1 - error("Got wrong number of timers. Wrote $(counter-1) but should have " - * "been $this_rank_nvars") + # Write data for all processes in the shared memory block, which must be + # unpacked from the arrays that were communicated. + if t_idx < 0 + # The top-level timer (usually "moment_kinetics" was probably the + # first one created. It definitely exists, and should have been + # written at each timestep. + # If we got the length of `time:$this_name`, the variable might have + # the wrong length (e.g. if it has only just been created and has + # length 1). + t_idx = length(io_group["time:" * first(keys(timer_names_all_ranks))]) + end + if parallel_io + timer_coord = (local_io_range=1:block_size[], + global_io_range=global_rank[]+1:global_rank[]+block_size[]) + else + timer_coord = (local_io_range=1:block_size[], + global_io_range=1:block_size[]) + end + function write_level(names_dict, this_name) + io_time = io_group["time:" * this_name] + io_ncalls = io_group["ncalls:" * this_name] + io_allocs = io_group["allocs:" * this_name] + @views append_to_dynamic_var(io_time, gathered_times_data[counter,:], t_idx, + parallel_io, timer_coord) + @views append_to_dynamic_var(io_ncalls, gathered_ncalls_data[counter,:], + t_idx, parallel_io, timer_coord) + @views append_to_dynamic_var(io_allocs, gathered_allocs_data[counter,:], + t_idx, parallel_io, timer_coord) + counter += 1 + for (sub_name, sub_dict) ∈ pairs(names_dict) + write_level(sub_dict, this_name * ";" * sub_name) end end + + for top_level_name ∈ keys(timer_names_all_ranks) + write_level(timer_names_all_ranks[top_level_name], top_level_name) + end end + # Pick a fixed size for "global_timer_string" so that we can overwrite the variable + # without needing to resize it. + global_timer_string_size = 10000 # 100 characters x 100 lines seems like a reasonable maximum size. + global_timer_description = "Formatted representation of global_timer" if global_rank[] == 0 || (block_rank[] == 0 && !parallel_io) if t_idx > 1 || t_idx == -1 if t_idx == -1 top_level = nothing else - if "moment_kinetics" ∈ keys(timer_names_per_rank[global_rank[]]) + if "moment_kinetics" ∈ keys(timer_names_all_ranks) top_level = ("moment_kinetics", "time_advance! step", "ssp_rk!") + this_dict = timer_names_all_ranks + + # Check all the expected levels are present, otherwise just set + # top_level=nothing. + for n ∈ top_level + if n ∉ keys(this_dict) + top_level = nothing + break + end + this_dict = this_dict[n] + end else # If `time_advance!()` was called in a non-standard way (i.e. not by # `run_moment_kinetics()`), the actual timers might be different. In @@ -2971,33 +2798,35 @@ function write_timing_data(io_moments, t_idx, dfns=false) # was printed to the terminal, for a quick look. string_to_write = format_global_timer(; show_output=false, top_level=top_level) - string_size = Ref(length(string_to_write)) - if parallel_io - MPI.Bcast!(string_size, comm_inter_block[]; root=0) - end + + # Ensure `string_to_write` is no longer than `global_timer_string_size`. + string_to_write = string_to_write[1:min(length(string_to_write), global_timer_string_size)] + # Ensure `string_to_write` is at least as long as `global_timer_string_size`. + # Do this way instead of using `rpad()` because `rpad()` measures the length + # using `textwidth()` rather than a raw character count, whereas we want a + # fixed number of ASCII characters to write to the output file. + string_to_write = string_to_write * ' '^(global_timer_string_size - length(string_to_write)) + write_single_value!(get_group(io_moments.fid, "timing_data"), "global_timer_string", string_to_write; parallel_io=parallel_io, - description="Formatted representation of global_timer", + description=global_timer_description, overwrite=true) end elseif block_rank[] == 0 if t_idx > 1 || t_idx == -1 # Although only global_rank[]==0 needs to write "global_timer_string" when we # are using parallel I/O, other ranks in `comm_inter_block[]` must also call - # `write_single_value!() so that the variable in the HDF5 file can be - # (re-)created. - # ???These other ranks do not actually write data though, so it does not - # matter what is passed to the `data` argument of `write_single_value!()` (as - # long as it is a scalar). - # ? Need to pass a string with the right length? - string_size = Ref(0) - MPI.Bcast!(string_size, comm_inter_block[]; root=0) - string_to_write = " " ^ string_size[] + # `write_single_value!() so that the variable in the HDF5 file can be created. + # These other ranks do not actually write data though, so it does not matter + # what is passed to the `data` argument of `write_single_value!()` (as + # long as it is a string with the right length). + string_to_write = " " ^ global_timer_string_size write_single_value!(get_group(io_moments.fid, "timing_data"), "global_timer_string", string_to_write; - parallel_io=parallel_io, description="Formatted representation - of global_timer", overwrite=true) + parallel_io=parallel_io, + description=global_timer_description, + overwrite=true) end end @@ -3012,8 +2841,8 @@ file. Needs to be called after exiting from the `@timeit` block so that all time finalised properly. """ function write_final_timing_data_to_binary(io_or_file_info_moments, io_or_file_info_dfns) - io_moments = nothing - io_dfns_moments = nothing + io_moments = io_or_file_info_moments + io_dfns_moments = io_or_file_info_dfns @serial_region begin # Only read/write from first process in each 'block' @@ -3411,7 +3240,7 @@ binary output file t_params, nl_solver_params, r, z, vperp, vpa, vzeta, vr, vz) = begin io_dfns = nothing - io_dfns_moments = nothing + io_dfns_moments = io_or_file_info_dfns closefile = true @serial_region begin # Only read/write from first process in each 'block' @@ -3589,8 +3418,8 @@ end close all opened output files """ function finish_file_io(ascii_io::Union{ascii_ios,Nothing}, - binary_moments::Union{io_moments_info,Tuple,Nothing}, - binary_dfns::Union{io_dfns_info,Tuple,Nothing}) + binary_moments::Union{io_moments_info,Tuple,NamedTuple,Nothing}, + binary_dfns::Union{io_dfns_info,Tuple,NamedTuple,Nothing}) @serial_region begin # Only read/write from first process in each 'block' @@ -3618,7 +3447,7 @@ end close output files for electron initialization """ function finish_electron_io( - binary_initial_electron::Union{io_initial_electron_info,Tuple,Nothing,Bool}) + binary_initial_electron::Union{io_initial_electron_info,Tuple,Nothing,Bool,NamedTuple}) @serial_region begin # Only read/write from first process in each 'block' diff --git a/moment_kinetics/src/file_io_hdf5.jl b/moment_kinetics/src/file_io_hdf5.jl index 8b9ff40e5..d647ab554 100644 --- a/moment_kinetics/src/file_io_hdf5.jl +++ b/moment_kinetics/src/file_io_hdf5.jl @@ -92,9 +92,6 @@ function write_single_value!(file_or_group::HDF5.H5DataStore, name, description=nothing, units=nothing, overwrite=false) where {T,N} if isa(data, Union{Number, AbstractString}) - if overwrite && name ∈ keys(file_or_group) - delete_object(file_or_group, name) - end # When we write a scalar, and parallel_io=true, we need to create the variable on # every process in `comm_inter_block[]` but we only want to actually write the # data from one process (we choose `global_rank[]==0`) to avoid corruption due to @@ -106,15 +103,17 @@ function write_single_value!(file_or_group::HDF5.H5DataStore, name, # length of the string, so cannot be easily created by hand. Note that a String of # the correct length must be passed from every process in `comm_inter_block[]`, # but only the contents of the string on `global_rank[]==0` are actually written. - io_var, var_hdf5_type = create_dataset(file_or_group, name, data) - if !parallel_io || global_rank[] == 0 - write_dataset(io_var, var_hdf5_type, data) + if !(overwrite && name ∈ keys(file_or_group)) + create_dataset(file_or_group, name, data) + if description !== nothing + add_attribute!(file_or_group[name], "description", description) + end + if units !== nothing + add_attribute!(file_or_group[name], "units", units) + end end - if description !== nothing - add_attribute!(file_or_group[name], "description", description) - end - if units !== nothing - add_attribute!(file_or_group[name], "units", units) + if !parallel_io || global_rank[] == 0 + write(file_or_group[name], data) end return nothing end @@ -262,14 +261,14 @@ end function append_to_dynamic_var(io_var::HDF5.Dataset, data::Union{Nothing,Number,AbstractArray{T,N}}, t_idx, parallel_io::Bool, - coords::Union{coordinate,Integer}...; + coords::Union{coordinate,NamedTuple,Integer}...; only_root=false, write_from_this_rank=nothing) where {T,N} # Extend time dimension for this variable dims = size(io_var) dims_mod = (dims[1:end-1]..., t_idx) HDF5.set_extent_dims(io_var, dims_mod) - local_ranges = Tuple(isa(c, coordinate) ? c.local_io_range : 1:c for c ∈ coords) - global_ranges = Tuple(isa(c, coordinate) ? c.global_io_range : 1:c for c ∈ coords) + local_ranges = Tuple(isa(c, Integer) ? (1:c) : c.local_io_range for c ∈ coords) + global_ranges = Tuple(isa(c, Integer) ? (1:c) : c.global_io_range for c ∈ coords) if only_root && parallel_io && global_rank[] != 0 # Variable should only be written from root, and this process is not root for the diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl index fb5ac876b..001d90824 100644 --- a/moment_kinetics/src/fokker_planck.jl +++ b/moment_kinetics/src/fokker_planck.jl @@ -45,6 +45,7 @@ using FastGaussQuadrature using Dates using LinearAlgebra: lu, ldiv! using MPI +using OrderedCollections: OrderedDict using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float, allocate_shared_float using ..communication @@ -82,7 +83,7 @@ use_fokker_planck = true nuii = 1.0 frequency_option = "manual" """ -function setup_fkpl_collisions_input(toml_input::Dict) +function setup_fkpl_collisions_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) # get reference collision frequency (note factor of 1/2 due to definition choices) nuii_fkpl_default = 0.5*get_reference_collision_frequency_ii(reference_params) @@ -117,7 +118,7 @@ function setup_fkpl_collisions_input(toml_input::Dict) if !input_section["use_fokker_planck"] input_section["nuii"] = -1.0 end - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) #println(input) if input_section["slowing_down_test"] # calculate nu_alphae and vc3 (critical speed of slowing down) diff --git a/moment_kinetics/src/geo.jl b/moment_kinetics/src/geo.jl index 41be4d0db..933598a65 100644 --- a/moment_kinetics/src/geo.jl +++ b/moment_kinetics/src/geo.jl @@ -14,6 +14,8 @@ using ..array_allocation: allocate_float using ..type_definitions: mk_float, mk_int using ..reference_parameters: setup_reference_parameters +using OrderedCollections: OrderedDict + """ struct containing the geometric data necessary for non-trivial axisymmetric geometries, to be passed @@ -81,7 +83,7 @@ DeltaB = 0.0 option = "" """ -function setup_geometry_input(toml_input::Dict) +function setup_geometry_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) reference_rhostar = get_default_rhostar(reference_params) @@ -105,7 +107,7 @@ function setup_geometry_input(toml_input::Dict) # constant for testing nonzero dBdr when nr = 1 dBdr_constant = 0.0) - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) #println(input) return geometry_input(; input...) end diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index f427c3a83..edcdb9a8a 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -17,12 +17,15 @@ export io_input export pp_input export geometry_input export set_defaults_and_check_top_level!, set_defaults_and_check_section!, - check_sections!, options_to_TOML, Dict_to_NamedTuple + check_sections!, options_to_TOML, Dict_to_NamedTuple, + convert_to_sorted_nested_OptionsDict using ..communication -using ..type_definitions: mk_float, mk_int +using ..type_definitions: mk_float, mk_int, OptionsDict +using DataStructures: SortedDict using MPI +using OrderedCollections: OrderedDict using TOML """ @@ -730,7 +733,7 @@ import Base: get Utility method for converting a string to an Enum when getting from a Dict, based on the type of the default value """ -function get(d::Dict, key, default::Enum) +function get(d::OptionsDict, key, default::Enum) val_maybe_string = get(d, key, nothing) if val_maybe_string == nothing return default @@ -750,14 +753,12 @@ end Set the defaults for options in the top level of the input, and check that there are not any unexpected options (i.e. options that have no default). -Modifies the options[section_name]::Dict by adding defaults for any values that are not -already present. +Modifies the options[section_name]::OptionsDict by adding defaults for any values that +are not already present. Ignores any sections, as these will be checked separately. """ -function set_defaults_and_check_top_level!(options::AbstractDict; kwargs...) - DictType = typeof(options) - +function set_defaults_and_check_top_level!(options::OptionsDict; kwargs...) # Check for any unexpected values in the options - all options that are set should be # present in the kwargs of this function call options_keys_symbols = keys(kwargs) @@ -781,13 +782,12 @@ function set_defaults_and_check_top_level!(options::AbstractDict; kwargs...) return options end -function _get_section_and_check_option_names(options, section_name, section_keys) - - DictType = typeof(options) +function _get_section_and_check_option_names(options::OptionsDict, section_name, + section_keys) if !(section_name ∈ keys(options)) # If section is not present, create it - options[section_name] = DictType() + options[section_name] = OptionsDict() end if !isa(options[section_name], AbstractDict) @@ -839,11 +839,10 @@ const _section_check_store_name = "_section_check_store" Set the defaults for options in a section, and check that there are not any unexpected options (i.e. options that have no default). -Modifies the options[section_name]::Dict by adding defaults for any values that are not -already present. +Modifies the options[section_name]::OptionsDict by adding defaults for any values that +are not already present. """ -function set_defaults_and_check_section!(options::AbstractDict, section_name; - kwargs...) +function set_defaults_and_check_section!(options::OptionsDict, section_name; kwargs...) section_keys_symbols = keys(kwargs) section_keys = (String(k) for k ∈ section_keys_symbols) @@ -865,7 +864,7 @@ function set_defaults_and_check_section!(options::AbstractDict, section_name; end """ - set_defaults_and_check_section!(options::AbstractDict, struct_type::Type, + set_defaults_and_check_section!(options::OptionsDict, struct_type::Type, name::Union{String,Nothing}=nothing) Alternative form to be used when the options should be stored in a struct of type @@ -874,15 +873,15 @@ Alternative form to be used when the options should be stored in a struct of typ The returned instance of `struct_type` is immutable, so if you need to modify the settings - e.g. to apply some logic to set defaults depending on other settings/parameters - then you should use the 'standard' version of [`set_defaults_and_check_section!`](@ref) that -returns a `Dict` that can be modified, and then use that `Dict` to initialise the -`struct_type`. +returns a `OptionsDict` that can be modified, and then use that `OptionsDict` to +initialise the `struct_type`. The name of the section in the options that will be read defaults to the name of `struct_type`, but can be set using the `section_name` argument. Returns an instance of `struct_type`. """ -function set_defaults_and_check_section!(options::AbstractDict, struct_type::Type, +function set_defaults_and_check_section!(options::OptionsDict, struct_type::Type, section_name::Union{String,Nothing}=nothing) if section_name === nothing @@ -910,12 +909,12 @@ function set_defaults_and_check_section!(options::AbstractDict, struct_type::Typ end """ - check_sections!(options::AbstractDict) + check_sections!(options::OptionsDict) Check that there are no unexpected sections in `options`. The 'expected sections' are the ones that were defined with [`set_defaults_and_check_section!`](@ref). """ -function check_sections!(options::AbstractDict; check_no_top_level_options=true) +function check_sections!(options::OptionsDict; check_no_top_level_options=true) expected_section_names = pop!(options, _section_check_store_name) @@ -945,18 +944,38 @@ function check_sections!(options::AbstractDict; check_no_top_level_options=true) end """ -Convert a Dict whose keys are String or Symbol to a NamedTuple + convert_to_sorted_nested_OptionsDict(d::AbstractDict) + +To ensure consistency when writing options to an output file, the entries in the +dictionary containing the options must be in a deterministic order. As TOML reads +options into a nested `Dict`, the only way to guarantee this is to sort the options +before storing them in an `OptionsDict`. `OptionsDict` is an alias for +`OrderedDict{String,Any}` so it will preserve the order of entries as long as they were +in a consistent order when it was created. +""" +function convert_to_sorted_nested_OptionsDict(d::AbstractDict) + sorted_d = SortedDict{String,Any}(d) + for (k,v) ∈ pairs(sorted_d) + if isa(v, AbstractDict) + sorted_d[k] = convert_to_sorted_nested_OptionsDict(v) + end + end + return OptionsDict(sorted_d) +end + +""" +Convert an OrderedDict whose keys are String or Symbol to a NamedTuple Useful as NamedTuple is immutable, so option values cannot be accidentally changed. """ -function Dict_to_NamedTuple(d) +function Dict_to_NamedTuple(d::OrderedDict) return NamedTuple(Symbol(k)=>v for (k,v) ∈ d) end """ options_to_toml(io::IO [=stdout], data::AbstractDict; sorted=false, by=identity) -Convert `moment_kinetics` 'options' (in the form of a `Dict`) to TOML format. +Convert `moment_kinetics` 'options' (in the form of an `AbstractDict`) to TOML format. This function is defined so that we can handle some extra types, for example `Enum`. diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index 6e37d6dfc..f0a99404d 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -15,6 +15,8 @@ using ..reference_parameters: get_reference_collision_frequency_ii, get_reference_collision_frequency_ei using ..reference_parameters: setup_reference_parameters +using OrderedCollections: OrderedDict + """ Function for reading Krook collision operator input parameters. Structure the namelist as follows. @@ -24,7 +26,7 @@ use_krook = true nuii0 = 1.0 frequency_option = "manual" """ -function setup_krook_collisions_input(toml_input::Dict) +function setup_krook_collisions_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) # get reference collision frequency nuii_krook_default = get_reference_collision_frequency_ii(reference_params) @@ -63,7 +65,7 @@ function setup_krook_collisions_input(toml_input::Dict) input_section["nuee0"] = -1.0 input_section["nuei0"] = -1.0 end - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) #println(input) return krook_collisions_input(; input...) end diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 53ceda028..35e427319 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -39,6 +39,7 @@ using ..z_advection: update_speed_z! using Glob using HDF5 using MPI +using OrderedCollections: OrderedDict const timestep_diagnostic_variables = ("time_for_run", "step_counter", "dt", "failure_counter", "failure_caused_by", @@ -664,7 +665,7 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, vzeta, vr, vz, parallel_io) # Test whether any interpolation is needed - interpolation_needed = Dict( + interpolation_needed = OrderedDict( x.name => (restart_x !== nothing && (x.n != restart_x.n || !all(isapprox.(x.grid, restart_x.grid)))) @@ -1105,7 +1106,7 @@ function reload_electron_data!(pdf, moments, t_params, restart_prefix_iblock, ti vzeta, vr, vz, parallel_io) # Test whether any interpolation is needed - interpolation_needed = Dict( + interpolation_needed = OrderedDict( x.name => x.n != restart_x.n || !all(isapprox.(x.grid, restart_x.grid)) for (x, restart_x) ∈ ((z, restart_z), (r, restart_r), (vperp, restart_vperp), (vpa, restart_vpa))) @@ -3595,16 +3596,13 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin evolving_variables = Tuple(evolving_variables) end - timing_variable_names = Dict{mk_int, Vector{String}}() - for fid ∈ fids0 - timing_group = get_group(fid, "timing_data") - timing_rank_names = collect(k for k in keys(timing_group) if startswith(k, "rank")) - for group_name ∈ timing_rank_names - rank_group = get_group(timing_group, group_name) - irank = parse(mk_int, split(group_name, "rank")[2]) - timing_variable_names[irank] = collect(keys(rank_group)) - end - end + # Assume the timing variables are the same in every restart - this may not always be + # true, and might cause errors if some variables are missing for restarts after the + # first. + timing_group = get_group(fids0[1], "timing_data") + timing_variable_names = collect(k for k in keys(timing_group) + if startswith(k, "time:") || startswith(k, "ncalls:") || + startswith(k, "allocs:")) if parallel_io files = fids0 diff --git a/moment_kinetics/src/maxwell_diffusion.jl b/moment_kinetics/src/maxwell_diffusion.jl index 73a36c44a..85d2d4639 100644 --- a/moment_kinetics/src/maxwell_diffusion.jl +++ b/moment_kinetics/src/maxwell_diffusion.jl @@ -21,6 +21,8 @@ using ..calculus: second_derivative! using ..timer_utils using ..reference_parameters: get_reference_collision_frequency_ii, setup_reference_parameters +using OrderedCollections: OrderedDict + """ Function for reading Maxwell diffusion operator input parameters. Structure the namelist as follows. @@ -30,7 +32,7 @@ use_maxwell_diffusion = true D_ii = 1.0 diffusion_coefficient_option = "manual" """ -function setup_mxwl_diff_collisions_input(toml_input::Dict) +function setup_mxwl_diff_collisions_input(toml_input::AbstractDict) reference_params = setup_reference_parameters(toml_input) # get reference diffusion coefficient, made up of collision frequency and # thermal speed for now. NOTE THAT THIS CONSTANT PRODUCES ERRORS. DO NOT USE @@ -63,7 +65,7 @@ function setup_mxwl_diff_collisions_input(toml_input::Dict) input_section["D_ii"] = -1.0 input_section["D_nn"] = -1.0 end - input = Dict(Symbol(k)=>v for (k,v) in input_section) + input = OrderedDict(Symbol(k)=>v for (k,v) in input_section) return mxwl_diff_collisions_input(; input...) end diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 1f2c86593..3309ca8ed 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -125,9 +125,6 @@ function run_moment_kinetics(input_dict::OptionsDict; restart=false, restart_tim mk_state = nothing try - # Reset timers in case a previous run was timed - reset_mk_timers!() - @timeit global_timer "moment_kinetics" begin # set up all the structs, etc. needed for a run mk_state = setup_moment_kinetics(input_dict; restart=restart, @@ -187,7 +184,7 @@ function run_moment_kinetics() end restart_time_index = options["restart-time-index"] if inputfile === nothing - this_input = Dict() + this_input = OptionsDict() else this_input = inputfile end @@ -404,6 +401,9 @@ function cleanup_moment_kinetics!(ascii_io, io_moments, io_dfns) end end + # Reset timers + reset_mk_timers!() + # clean up MPI objects finalize_comms!() diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 9f49aef52..18e78aeb6 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -31,7 +31,7 @@ using TOML Read input from a TOML file """ function read_input_file(input_filename::String) - input = TOML.parsefile(input_filename) + input = convert_to_sorted_nested_OptionsDict(TOML.parsefile(input_filename)) # Use input_filename (without the extension) as default for "run_name" if !("output" ∈ keys(input) && "run_name" in keys(input["output"])) diff --git a/moment_kinetics/src/species_input.jl b/moment_kinetics/src/species_input.jl index 59d7043fe..dc9067891 100644 --- a/moment_kinetics/src/species_input.jl +++ b/moment_kinetics/src/species_input.jl @@ -13,6 +13,8 @@ using ..input_structs: spatial_initial_condition_input, velocity_initial_conditi using ..input_structs: boltzmann_electron_response, boltzmann_electron_response_with_simple_sheath using ..reference_parameters: setup_reference_parameters +using OrderedCollections: OrderedDict + function get_species_input(toml_input) reference_params = setup_reference_parameters(toml_input) @@ -79,7 +81,7 @@ function get_species_input(toml_input) velocity_initial_condition_input, "vpa_IC_ion_species_$is") - spec_input = Dict(Symbol(k)=>v for (k,v) in spec_section) + spec_input = OrderedDict(Symbol(k)=>v for (k,v) in spec_section) ion_spec_params_list[is] = ion_species_parameters(; type="ion", z_IC=z_IC, r_IC=r_IC, vpa_IC=vpa_IC, spec_input...) @@ -106,14 +108,14 @@ function get_species_input(toml_input) velocity_initial_condition_input, "vz_IC_neutral_species_$isn") - spec_input = Dict(Symbol(k)=>v for (k,v) in spec_section) + spec_input = OrderedDict(Symbol(k)=>v for (k,v) in spec_section) neutral_spec_params_list[isn] = neutral_species_parameters(; type="neutral", z_IC=z_IC, r_IC=r_IC, vz_IC=vz_IC, spec_input...) end # construct composition struct - composition_input = Dict(Symbol(k)=>v for (k,v) in composition_section) + composition_input = OrderedDict(Symbol(k)=>v for (k,v) in composition_section) composition = species_composition(; n_species=nspec_tot, ion=ion_spec_params_list, neutral=neutral_spec_params_list, composition_input...) diff --git a/moment_kinetics/src/timer_utils.jl b/moment_kinetics/src/timer_utils.jl index 1fadff14e..3ff8f02ff 100644 --- a/moment_kinetics/src/timer_utils.jl +++ b/moment_kinetics/src/timer_utils.jl @@ -37,16 +37,16 @@ const TimerNamesDict = SortedDict{String,SortedDict,Base.Order.ForwardOrdering} TimerNamesDict() = TimerNamesDict(Base.Order.ForwardOrdering()) """ -Nested Dict containting the names of all timers that have been created on each MPI rank +Nested SortedDict with the names of all timers that have been created on any MPI rank and added to the moments output file. """ -const timer_names_per_rank_moments = SortedDict{mk_int,Tuple{TimerNamesDict,Ref{mk_int}}}() +const timer_names_all_ranks_moments = TimerNamesDict() """ -Nested Dict containting the names of all timers that have been created on each MPI rank +Nested SortedDict with the names of all timers that have been created on any MPI rank and added to the dfns output file. """ -const timer_names_per_rank_dfns = SortedDict{mk_int,Tuple{TimerNamesDict,Ref{mk_int}}}() +const timer_names_all_ranks_dfns = TimerNamesDict() """ format_global_timer(; show=true, truncate_output=true) @@ -165,6 +165,7 @@ function format_global_timer(; show_output=false, threshold=1.0e-3, truncate_out # when we save the string to an HDF5 or NetCDF file. μ often appears because times # may be printed in microseconds. result = replace(result, "μ" => "u") + result = ascii(replace(result, !isascii=>' ')) end end @@ -175,8 +176,8 @@ Reset all global state of timers. """ function reset_mk_timers!() reset_timer!(global_timer) - empty!(timer_names_per_rank_moments) - empty!(timer_names_per_rank_dfns) + empty!(timer_names_all_ranks_moments) + empty!(timer_names_all_ranks_dfns) end end #timer_utils diff --git a/moment_kinetics/src/type_definitions.jl b/moment_kinetics/src/type_definitions.jl index e53fea73d..b5b14fafc 100644 --- a/moment_kinetics/src/type_definitions.jl +++ b/moment_kinetics/src/type_definitions.jl @@ -6,6 +6,8 @@ export mk_float export mk_int export OptionsDict +using OrderedCollections: OrderedDict + """ """ const mk_float = Float64 @@ -16,6 +18,6 @@ const mk_int = Int64 """ """ -const OptionsDict = Dict{String,Any} +const OptionsDict = OrderedDict{String,Any} end diff --git a/moment_kinetics/src/utils.jl b/moment_kinetics/src/utils.jl index ce92c5d98..74b8a3bbb 100644 --- a/moment_kinetics/src/utils.jl +++ b/moment_kinetics/src/utils.jl @@ -30,14 +30,14 @@ function __init__() end """ - get_unnormalized_parameters(input::Dict) + get_unnormalized_parameters(input::AbstractDict) get_unnormalized_parameters(input_filename::String) Get many parameters for the simulation setup given by `input` or in the file `input_filename`, in SI units and eV, returned as an OrderedDict. """ function get_unnormalized_parameters end -function get_unnormalized_parameters(input::Dict) +function get_unnormalized_parameters(input::AbstractDict) io_input, evolve_moments, t_params, z, z_spectral, r, r_spectral, vpa, vpa_spectral, vperp, vperp_spectral, gyrophase, gyrophase_spectral, vz, vz_spectral, vr, vr_spectral, vzeta, vzeta_spectral, composition, species, collisions, geometry, @@ -90,8 +90,8 @@ end """ print_unnormalized_parameters(input) -Print many parameters for the simulation setup given by `input` (a Dict of parameters or -a String giving a filename), in SI units and eV. +Print many parameters for the simulation setup given by `input` (an AbstractDict of +parameters or a String giving a filename), in SI units and eV. """ function print_unnormalized_parameters(args...; kwargs...) @@ -312,23 +312,26 @@ Merge two AbstractDicts `a` and `b`. Any elements that are AbstractDicts are als """ function recursive_merge end function recursive_merge(a::AbstractDict, b::AbstractDict) - return mergewith(recursive_merge, a, b) -end -function recursive_merge(a::AbstractDict, b) - error("Cannot merge a Dict with a non-Dict, got $a and $b") -end -function recursive_merge(a, b::AbstractDict) - error("Cannot merge a Dict with a non-Dict, got $a and $b") -end -function recursive_merge(a, b) - return b + result = deepcopy(a) + a_keys = collect(keys(a)) + for (k,v) ∈ pairs(b) + if k ∉ a_keys + result[k] = v + elseif isa(result[k], AbstractDict) && isa(v, AbstractDict) + result[k] = recursive_merge(result[k], v) + elseif isa(result[k], AbstractDict) || isa(v, AbstractDict) + error("Cannot merge a Dict with a non-Dict, got $(result[k]) and $v") + else + result[k] = v + end + end + return result end """ -Dict merge function for named keyword arguments -for case when input Dict is a mixed Dict of Dicts -and non-Dict float/int/string entries, and the -keyword arguments are also a mix of Dicts and non-Dicts +Dict merge function for named keyword arguments for case when input AbstractDict is a +mixed AbstractDict of AbstractDicts and non-AbstractDict float/int/string entries, and +the keyword arguments are also a mix of AbstractDicts and non-AbstractDicts """ function merge_dict_with_kwargs!(dict_base; args...) for (k,v) in args diff --git a/moment_kinetics/test/harrisonthompson.jl b/moment_kinetics/test/harrisonthompson.jl index d0ec7f31b..64cc24e66 100644 --- a/moment_kinetics/test/harrisonthompson.jl +++ b/moment_kinetics/test/harrisonthompson.jl @@ -62,63 +62,63 @@ function findphi(z, R_ion) end # default inputs for tests -test_input_finite_difference = Dict("composition" => OptionsDict("n_ion_species" => 1, - "n_neutral_species" => 0, - "electron_physics" => "boltzmann_electron_response", - "T_e" => 1.0, - "T_wall" => 1.0), - "ion_species_1" => OptionsDict("initial_density" => 1.0, - "initial_temperature" => 1.0), - "z_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 0.0, - "density_phase" => 0.0, - "upar_amplitude" => 0.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), - "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 1.0, - "density_phase" => 0.0, - "upar_amplitude" => 0.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), - "output" => OptionsDict("run_name" => "finite_difference", - "parallel_io" => false), - "evolve_moments" => OptionsDict("density" => false, - "parallel_flow" => false, - "parallel_pressure" => false, - "moments_conservation" => false), - "reactions" => OptionsDict("charge_exchange_frequency" => 0.0, - "ionization_frequency" => 0.0), - "timestepping" => OptionsDict("nstep" => 9000, - "dt" => 0.0005, - "nwrite" => 9000, - "split_operators" => false), - "r" => OptionsDict("ngrid" => 1, - "nelement" => 1, - "bc" => "periodic", - "discretization" => "finite_difference"), - "z" => OptionsDict("ngrid" => 100, - "nelement" => 1, - "bc" => "wall", - "discretization" => "finite_difference"), - "vpa" => OptionsDict("ngrid" => 200, - "nelement" => 1, - "L" => 8.0, - "bc" => "zero", - "discretization" => "finite_difference"), - "vz" => OptionsDict("ngrid" => 200, - "nelement" => 1, - "L" => 8.0, - "bc" => "zero", - "discretization" => "finite_difference"), - "ion_source_1" => OptionsDict("active" => true, - "source_strength" => ionization_frequency, - "source_T" => 0.25, - "z_profile" => "constant", - "r_profile" => "constant"), - ) +test_input_finite_difference = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, + "n_neutral_species" => 0, + "electron_physics" => "boltzmann_electron_response", + "T_e" => 1.0, + "T_wall" => 1.0), + "ion_species_1" => OptionsDict("initial_density" => 1.0, + "initial_temperature" => 1.0), + "z_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 0.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0), + "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0), + "output" => OptionsDict("run_name" => "finite_difference", + "parallel_io" => false), + "evolve_moments" => OptionsDict("density" => false, + "parallel_flow" => false, + "parallel_pressure" => false, + "moments_conservation" => false), + "reactions" => OptionsDict("charge_exchange_frequency" => 0.0, + "ionization_frequency" => 0.0), + "timestepping" => OptionsDict("nstep" => 9000, + "dt" => 0.0005, + "nwrite" => 9000, + "split_operators" => false), + "r" => OptionsDict("ngrid" => 1, + "nelement" => 1, + "bc" => "periodic", + "discretization" => "finite_difference"), + "z" => OptionsDict("ngrid" => 100, + "nelement" => 1, + "bc" => "wall", + "discretization" => "finite_difference"), + "vpa" => OptionsDict("ngrid" => 200, + "nelement" => 1, + "L" => 8.0, + "bc" => "zero", + "discretization" => "finite_difference"), + "vz" => OptionsDict("ngrid" => 200, + "nelement" => 1, + "L" => 8.0, + "bc" => "zero", + "discretization" => "finite_difference"), + "ion_source_1" => OptionsDict("active" => true, + "source_strength" => ionization_frequency, + "source_T" => 0.25, + "z_profile" => "constant", + "r_profile" => "constant"), + ) test_input_chebyshev = recursive_merge(test_input_finite_difference, OptionsDict("output" => OptionsDict("run_name" => "chebyshev_pseudospectral"), diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 7e48cc80f..e61ae7d33 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -131,57 +131,57 @@ test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix" "discretization" => "gausslegendre_pseudospectral", "element_spacing_option" => "coarse_tails", ), - "timestepping" => Dict{String,Any}("type" => "KennedyCarpenterARK324", - "implicit_electron_advance" => false, - "implicit_electron_ppar" => true, - "implicit_ion_advance" => false, - "implicit_vpa_advection" => false, - "nstep" => 1, - "dt" => ion_dt, - "minimum_dt" => 1.0e-7, - "rtol" => 1.0e-4, - "max_increase_factor_near_last_fail" => 1.001, - "last_fail_proximity_factor" => 1.1, - "max_increase_factor" => 1.05, - "nwrite" => 10000, - "nwrite_dfns" => 10000, - "steady_state_residual" => true, - "converged_residual_value" => 1.0e-3, - ), - "electron_timestepping" => Dict{String,Any}("nstep" => 1, - "dt" => dt, - "maximum_dt" => 1.0, - "nwrite" => 10000, - "nwrite_dfns" => 100000, - "type" => "Fekete4(3)", - "rtol" => 1.0e-6, - "atol" => 1.0e-14, - "minimum_dt" => 1.0e-10, - "initialization_residual_value" => 2.5, - "converged_residual_value" => 1.0e-2, - "constraint_forcing_rate" => 2.321, - ), - "nonlinear_solver" => Dict{String,Any}("nonlinear_max_iterations" => 100, - "rtol" => 1.0e-5, - "atol" => 1.0e-15, - "preconditioner_update_interval" => 1, + "timestepping" => OptionsDict("type" => "KennedyCarpenterARK324", + "implicit_electron_advance" => false, + "implicit_electron_ppar" => true, + "implicit_ion_advance" => false, + "implicit_vpa_advection" => false, + "nstep" => 1, + "dt" => ion_dt, + "minimum_dt" => 1.0e-7, + "rtol" => 1.0e-4, + "max_increase_factor_near_last_fail" => 1.001, + "last_fail_proximity_factor" => 1.1, + "max_increase_factor" => 1.05, + "nwrite" => 10000, + "nwrite_dfns" => 10000, + "steady_state_residual" => true, + "converged_residual_value" => 1.0e-3, + ), + "electron_timestepping" => OptionsDict("nstep" => 1, + "dt" => dt, + "maximum_dt" => 1.0, + "nwrite" => 10000, + "nwrite_dfns" => 100000, + "type" => "Fekete4(3)", + "rtol" => 1.0e-6, + "atol" => 1.0e-14, + "minimum_dt" => 1.0e-10, + "initialization_residual_value" => 2.5, + "converged_residual_value" => 1.0e-2, + "constraint_forcing_rate" => 2.321, ), - "ion_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 1.0e0, + "nonlinear_solver" => OptionsDict("nonlinear_max_iterations" => 100, + "rtol" => 1.0e-5, + "atol" => 1.0e-15, + "preconditioner_update_interval" => 1, + ), + "ion_numerical_dissipation" => OptionsDict("vpa_dissipation_coefficient" => 1.0e0, + "force_minimum_pdf_value" => 0.0, + ), + "electron_numerical_dissipation" => OptionsDict("vpa_dissipation_coefficient" => 2.0, "force_minimum_pdf_value" => 0.0, ), - "electron_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 2.0, - "force_minimum_pdf_value" => 0.0, - ), - "neutral_numerical_dissipation" => Dict{String,Any}("vz_dissipation_coefficient" => 1.0e-1, - "force_minimum_pdf_value" => 0.0, - ), - "ion_source_1" => Dict{String,Any}("active" => true, - "z_profile" => "gaussian", - "z_width" => 0.125, - "source_strength" => 0.1, - "source_T" => 2.0, - ), - "krook_collisions" => Dict{String,Any}("use_krook" => true), + "neutral_numerical_dissipation" => OptionsDict("vz_dissipation_coefficient" => 1.0e-1, + "force_minimum_pdf_value" => 0.0, + ), + "ion_source_1" => OptionsDict("active" => true, + "z_profile" => "gaussian", + "z_width" => 0.125, + "source_strength" => 0.1, + "source_T" => 2.0, + ), + "krook_collisions" => OptionsDict("use_krook" => true), ) function get_mk_state(test_input)