From b32accf11f6f3b8c157dc6a4d178e0392b0bc499 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 7 May 2024 09:32:19 +0100 Subject: [PATCH 01/75] Remove update_chodura!() call from time_advance!() This was (should have been?) removed in #187, but probably got reintroduced in a bad merge. --- moment_kinetics/src/time_advance.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index e5cdc0ef4..76e5a73a7 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -22,7 +22,6 @@ using ..velocity_moments: update_neutral_density!, update_neutral_qz! using ..velocity_moments: update_neutral_uzeta!, update_neutral_uz!, update_neutral_ur! using ..velocity_moments: update_neutral_pzeta!, update_neutral_pz!, update_neutral_pr! using ..velocity_moments: calculate_ion_moment_derivatives!, calculate_neutral_moment_derivatives! -using ..velocity_moments: update_chodura! using ..velocity_grid_transforms: vzvrvzeta_to_vpavperp!, vpavperp_to_vzvrvzeta! using ..boundary_conditions: enforce_boundary_conditions! using ..boundary_conditions: enforce_neutral_boundary_conditions! @@ -1070,9 +1069,6 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end if write_moments || write_dfns || finish_now - # update the diagnostic chodura condition - update_chodura!(moments,scratch[end].pdf,vpa,vperp,z,r,spectral_objects.r_spectral,composition,geometry,scratch_dummy,advect_objects.z_advect) - # Always synchronise here, regardless of if we changed region or not begin_serial_region(no_synchronize=true) _block_synchronize() From eaefdb93cff1fdd3828db01d2c9b6a692c495e01 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 6 May 2024 16:26:21 +0100 Subject: [PATCH 02/75] Fix timestep diagnostics after addition of gyroaveraging feature The gyroaveraged electric fields are not saved, so cannot be easily used in post-processing. For now, just copy `Er` into `gEr` where it is needed. --- moment_kinetics/src/load_data.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 91992e9c6..e86ba4043 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -3283,6 +3283,11 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t speed = allocate_float(nz, nvpa, nvperp, nr, nspecies, nt) Er = get_variable(run_info, "Er") + gEr = allocate_float(nvperp, nz, nr, nspecies, nt) + for it ∈ 1:nt, is ∈ 1:nspecies, ir ∈ 1:nr, iz ∈ 1:nz + # Don't support gyroaveraging here (yet) + gEr[:,iz,ir,is,it] .= Er[iz,ir,it] + end setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, @@ -3293,11 +3298,11 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t # Only need some struct with a 'speed' variable advect = (speed=@view(speed[:,:,:,:,is,it]),) # Only need Er - fields = (Er=@view(Er[:,:,it]),) + fields = (gEr=@view(gEr[:,:,:,is,it]),) @views update_speed_z!(advect, upar[:,:,is,it], vth[:,:,is,it], run_info.evolve_upar, run_info.evolve_ppar, fields, run_info.vpa, run_info.vperp, run_info.z, run_info.r, - run_info.time[it], run_info.geometry) + run_info.time[it], run_info.geometry, is) end # Horrible hack so that we can get the speed back without rearranging the From 7790cdd8f5d0660a6ba5e371d2b86d7e3a59da2c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 8 May 2024 17:09:34 +0100 Subject: [PATCH 03/75] Use distributed-MPI parallelism in more of the tests --- ...ear_sound_wave_inputs_and_expected_data.jl | 5 ++ .../test/nonlinear_sound_wave_tests.jl | 53 ++++++++++--------- .../test/recycling_fraction_tests.jl | 25 ++++----- moment_kinetics/test/setup.jl | 4 +- 4 files changed, 47 insertions(+), 40 deletions(-) diff --git a/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl b/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl index cdee05ac9..d76a644f9 100644 --- a/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl +++ b/moment_kinetics/test/nonlinear_sound_wave_inputs_and_expected_data.jl @@ -163,6 +163,11 @@ test_input_chebyshev = merge(test_input_finite_difference, "vz_ngrid" => 17, "vz_nelement" => 8)) +if global_size[] > 2 && global_size[] % 2 == 0 + # Test using distributed-memory + test_input_chebyshev["z_nelement_local"] = test_input_chebyshev["z_nelement"] ÷ 2 +end + test_input_chebyshev_split_1_moment = merge(test_input_chebyshev, Dict("run_name" => "chebyshev_pseudospectral_split_1_moment", diff --git a/moment_kinetics/test/nonlinear_sound_wave_tests.jl b/moment_kinetics/test/nonlinear_sound_wave_tests.jl index 0bb965494..590047000 100644 --- a/moment_kinetics/test/nonlinear_sound_wave_tests.jl +++ b/moment_kinetics/test/nonlinear_sound_wave_tests.jl @@ -6,12 +6,9 @@ using Base.Filesystem: tempname using moment_kinetics.coordinates: define_coordinate using moment_kinetics.input_structs: grid_input, advection_input -using moment_kinetics.load_data: open_readonly_output_file, load_coordinate_data, - load_species_data, load_fields_data, - load_ion_moments_data, load_pdf_data, - load_neutral_particle_moments_data, - load_neutral_pdf_data, load_time_data, load_species_data using moment_kinetics.interpolation: interpolate_to_grid_z, interpolate_to_grid_vpa +using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, + postproc_load_variable using moment_kinetics.type_definitions: mk_float const analytical_rtol = 3.e-2 @@ -79,35 +76,43 @@ function run_test(test_input, rtol, atol, upar_rtol=nothing; args...) # Load and analyse output ######################### - path = joinpath(realpath(input["base_directory"]), name, name) + path = joinpath(realpath(input["base_directory"]), name) - # open the netcdf file containing moments data and give it the handle 'fid' - fid = open_readonly_output_file(path, "moments") + # open the output file(s) + run_info = get_run_info_no_setup(path; dfns=true) # load species, time coordinate data - n_ion_species, n_neutral_species = load_species_data(fid) - ntime, time = load_time_data(fid) - n_ion_species, n_neutral_species = load_species_data(fid) + n_ion_species = run_info.composition.n_ion_species + n_neutral_species = run_info.composition.n_neutral_species + ntime = run_info.nt + time = run_info.time # load fields data - phi_zrt, Er_zrt, Ez_zrt = load_fields_data(fid) + phi_zrt = postproc_load_variable(run_info, "phi") + Er_zrt = postproc_load_variable(run_info, "Er") + Ez_zrt = postproc_load_variable(run_info, "Ez") # load velocity moments data - n_ion_zrst, upar_ion_zrst, ppar_ion_zrst, qpar_ion_zrst, v_t_ion_zrst = load_ion_moments_data(fid) - n_neutral_zrst, upar_neutral_zrst, ppar_neutral_zrst, qpar_neutral_zrst, v_t_neutral_zrst = load_neutral_particle_moments_data(fid) - z, z_spectral = load_coordinate_data(fid, "z") + n_ion_zrst = postproc_load_variable(run_info, "density") + upar_ion_zrst = postproc_load_variable(run_info, "parallel_flow") + ppar_ion_zrst = postproc_load_variable(run_info, "parallel_pressure") + qpar_ion_zrst = postproc_load_variable(run_info, "parallel_heat_flux") + v_t_ion_zrst = postproc_load_variable(run_info, "thermal_speed") + n_neutral_zrst = postproc_load_variable(run_info, "density_neutral") + upar_neutral_zrst = postproc_load_variable(run_info, "uz_neutral") + ppar_neutral_zrst = postproc_load_variable(run_info, "pz_neutral") + qpar_neutral_zrst = postproc_load_variable(run_info, "qz_neutral") + v_t_neutral_zrst = postproc_load_variable(run_info, "thermal_speed_neutral") + z = run_info.z + z_spectral = run_info.z_spectral - close(fid) - - # open the netcdf file containing pdf data - fid = open_readonly_output_file(path, "dfns") - # load particle distribution function (pdf) data - f_ion_vpavperpzrst = load_pdf_data(fid) - f_neutral_vzvrvzetazrst = load_neutral_pdf_data(fid) - vpa, vpa_spectral = load_coordinate_data(fid, "vpa") + f_ion_vpavperpzrst = postproc_load_variable(run_info, "f") + f_neutral_vzvrvzetazrst = postproc_load_variable(run_info, "f_neutral") + vpa = run_info.vpa + vpa_spectral = run_info.vpa_spectral - close(fid) + close_run_info(run_info) phi = phi_zrt[:,1,:] n_ion = n_ion_zrst[:,1,:,:] diff --git a/moment_kinetics/test/recycling_fraction_tests.jl b/moment_kinetics/test/recycling_fraction_tests.jl index 9e3c1697d..93c649390 100644 --- a/moment_kinetics/test/recycling_fraction_tests.jl +++ b/moment_kinetics/test/recycling_fraction_tests.jl @@ -12,10 +12,8 @@ using MPI using moment_kinetics.coordinates: define_coordinate using moment_kinetics.input_structs: grid_input, advection_input using moment_kinetics.interpolation: interpolate_to_grid_z -using moment_kinetics.load_data: open_readonly_output_file -using moment_kinetics.load_data: load_fields_data, - load_pdf_data, load_time_data, - load_species_data +using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, + postproc_load_variable # default inputs for tests test_input = Dict("n_ion_species" => 1, @@ -91,6 +89,10 @@ test_input = Dict("n_ion_species" => 1, "source_strength" => 2.0, "source_T" => 2.0)) +if global_size[] > 2 && global_size[] % 2 == 0 + # Test using distributed-memory + test_input["z_nelement_local"] = test_input["z_nelement"] ÷ 2 +end test_input_split1 = merge(test_input, Dict("run_name" => "split1", @@ -203,20 +205,15 @@ function run_test(test_input, expected_phi; rtol=4.e-14, atol=1.e-15, args...) # Load and analyse output ######################### - path = joinpath(realpath(input["base_directory"]), name, name) + path = joinpath(realpath(input["base_directory"]), name) - # open the netcdf file and give it the handle 'fid' - fid = open_readonly_output_file(path,"moments") + # open the output file(s) + run_info = get_run_info_no_setup(path) - # load species, time coordinate data - n_ion_species, n_neutral_species = load_species_data(fid) - ntime, time = load_time_data(fid) - n_ion_species, n_neutral_species = load_species_data(fid) - # load fields data - phi_zrt, Er_zrt, Ez_zrt = load_fields_data(fid) + phi_zrt = postproc_load_variable(run_info, "phi") - close(fid) + close_run_info(run_info) phi = phi_zrt[:,1,:] end diff --git a/moment_kinetics/test/setup.jl b/moment_kinetics/test/setup.jl index 3bb9896ef..555824d00 100644 --- a/moment_kinetics/test/setup.jl +++ b/moment_kinetics/test/setup.jl @@ -13,9 +13,9 @@ using moment_kinetics module MKTestUtilities export use_verbose, force_optional_dependencies, @long, quietoutput, get_MPI_tempdir, - global_rank, maxabs_norm, @testset_skip + global_rank, global_size, maxabs_norm, @testset_skip -using moment_kinetics.communication: comm_world, global_rank +using moment_kinetics.communication: comm_world, global_rank, global_size using moment_kinetics.command_line_options: get_options using MPI From 0366cb8f97e3be3183a89f13b92b9a4e2800d514 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 12 May 2024 11:53:02 +0100 Subject: [PATCH 04/75] Support selecting a subset of time points in timestep_diagnostics() --- .../src/makie_post_processing.jl | 66 +++++++++++-------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 725a16e86..75f2279bd 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -6893,9 +6893,9 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) input = Dict_to_NamedTuple(input_dict["timestep_diagnostics"]) - steps_fig = nothing - dt_fig = nothing - CFL_fig = nothing + steps_fig = nothing + dt_fig = nothing + CFL_fig = nothing if input.plot # Plot numbers of steps and numbers of failures @@ -6916,13 +6916,19 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) prefix = ri.run_name * " " end - plot_1d(ri.time, get_variable(ri, "steps_per_output"; it=it); + if it !== nothing + time = ri.time[it] + else + time = ri.time + end + + plot_1d(time, get_variable(ri, "steps_per_output"; it=it); label=prefix * "steps", ax=ax) # Fudge to create an invisible line on ax_failures that cycles the line colors # and adds a label for "steps_per_output" to the plot because we create the # legend from ax_failures. - plot_1d([ri.time[1]], [0]; label=prefix * "steps", ax=ax_failures) - plot_1d(ri.time, get_variable(ri, "failures_per_output"; it=it); + plot_1d([time[1]], [0]; label=prefix * "steps", ax=ax_failures) + plot_1d(time, get_variable(ri, "failures_per_output"; it=it); label=prefix * "failures", ax=ax_failures) failure_caused_by_per_output = get_variable(ri, @@ -6931,52 +6937,52 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) counter = 0 # Ion pdf failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; label=prefix * "failures caused by f_ion", ax=ax_failures) if ri.evolve_density # Ion density failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by n_ion", ax=ax_failures) end if ri.evolve_upar # Ion flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by u_ion", ax=ax_failures) end if ri.evolve_ppar # Ion flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by p_ion", ax=ax_failures) end if ri.n_neutral_species > 0 # Neutral pdf failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; label=prefix * "failures caused by f_neutral", ax=ax_failures) if ri.evolve_density # Neutral density failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by n_neutral", ax=ax_failures) end if ri.evolve_upar # Neutral flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by u_neutral", ax=ax_failures) end if ri.evolve_ppar # Neutral flow failure counter counter += 1 - plot_1d(ri.time, @view failure_caused_by_per_output[counter,:]; + plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by p_neutral", ax=ax_failures) end @@ -7016,6 +7022,11 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) else prefix = ri.run_name * " " end + if it !== nothing + time = ri.time[it] + else + time = ri.time + end CFL_vars = ["minimum_CFL_ion_z", "minimum_CFL_ion_vpa"] if ri.n_neutral_species > 0 push!(CFL_vars, "minimum_CFL_neutral_z", "minimum_CFL_neutral_vz") @@ -7023,7 +7034,7 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) for varname ∈ CFL_vars var = get_variable(ri, varname) maxval = min(maxval, maximum(var)) - plot_1d(ri.time, var; ax=ax, label=prefix*varname) + plot_1d(time, var; ax=ax, label=prefix*varname) end end ylims!(ax, 0.0, 4.0 * maxval) @@ -7037,56 +7048,59 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) else prefix = ri.run_name * " " end + if it !== nothing + time = ri.time[it] + else + time = ri.time + end limit_caused_by_per_output = get_variable(ri, "limit_caused_by_per_output"; it=it) counter = 0 - # Accuracy limit counter - counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "RK accuracy", ax=ax) # Maximum timestep increase limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep increase", ax=ax) # Slower maximum timestep increase near last failure limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep increase near last fail", ax=ax) # Minimum timestep limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "min timestep", ax=ax) # Maximum timestep limit counter counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep", ax=ax) # Ion z advection counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "ion z advect", ax=ax) # Ion vpa advection counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "ion vpa advect", ax=ax) if ri.n_neutral_species > 0 # Ion z advection counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "neutral z advect", ax=ax) # Ion vpa advection counter += 1 - plot_1d(ri.time, @view limit_caused_by_per_output[counter,:]; + plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "neutral vz advect", ax=ax) end From bf2862a5af7c698977024946fc5cb324f793a8ef Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 9 May 2024 09:05:02 +0100 Subject: [PATCH 05/75] Record which variable had largest error norm when RK accuracy limited dt Note this update changes the indexing of which factor caused the timestep limit, so timestep limits from older output files will no longer be labelled correctly by the postprocessing tools. --- .../src/makie_post_processing.jl | 46 ++++++++++++-- moment_kinetics/src/file_io.jl | 4 +- moment_kinetics/src/input_structs.jl | 1 + moment_kinetics/src/runge_kutta.jl | 30 +++++---- moment_kinetics/src/time_advance.jl | 62 +++++++++++++++---- 5 files changed, 113 insertions(+), 30 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 75f2279bd..dbf69ab4b 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7040,7 +7040,8 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) ylims!(ax, 0.0, 4.0 * maxval) put_legend_right(CFL_fig, ax) - limits_fig, ax = get_1d_ax(; xlabel="time", ylabel="number of limits per factor per output") + limits_fig, ax = get_1d_ax(; xlabel="time", ylabel="number of limits per factor per output", + size=(600, 500)) for ri ∈ run_info if length(run_info) == 1 @@ -7059,9 +7060,6 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) it=it) counter = 0 - plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "RK accuracy", ax=ax) - # Maximum timestep increase limit counter counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; @@ -7082,6 +7080,46 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep", ax=ax) + # Accuracy limit counters + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion pdf RK accuracy", ax=ax) + if ri.evolve_density + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion density RK accuracy", ax=ax) + end + if ri.evolve_upar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion upar RK accuracy", ax=ax) + end + if ri.evolve_ppar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion ppar RK accuracy", ax=ax) + end + if ri.n_neutral_species > 0 + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral pdf RK accuracy", ax=ax) + if ri.evolve_density + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral density RK accuracy", ax=ax) + end + if ri.evolve_upar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral uz RK accuracy", ax=ax) + end + if ri.evolve_ppar + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "neutral pz RK accuracy", ax=ax) + end + end + # Ion z advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index bae07d9bc..16c93099a 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -703,9 +703,9 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, parallel_io=parallel_io, description="cumulative count of how many times each variable caused a " * "timestep failure for the run") - n_limit_vars = 5 + 2 + n_limit_vars = 4 + 1 + evolve_density + evolve_upar + evolve_ppar + 2 if n_neutral_species > 0 - n_limit_vars += 2 + n_limit_vars += 1 + evolve_density + evolve_upar + evolve_ppar + 2 end io_limit_caused_by = create_dynamic_variable!( dynamic, "limit_caused_by", mk_int; diagnostic_var_size=n_limit_vars, diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index fe7661e9a..89afcb830 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -39,6 +39,7 @@ an option but known at compile time when a `time_info` struct is passed as a fun argument. """ struct time_info{Terrorsum <: Real} + n_variables::mk_int nstep::mk_int end_time::mk_float dt::MPISharedArray{mk_float,1} diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 92baf111b..7bb70c3bb 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -706,10 +706,10 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er CFL_limits = MPI.Allreduce(CFL_limits, min, comm_inter_block[]) CFL_limit_caused_by = argmin(CFL_limits) CFL_limit = CFL_limits[CFL_limit_caused_by] - # Reserve first five entries of t_params.limit_caused_by for accuracy, - # max_increase_factor, max_increase_factor_near_fail, minimum_dt and maximum_dt - # limits. - this_limit_caused_by = CFL_limit_caused_by + 5 + # Reserve first four entries of t_params.limit_caused_by for max_increase_factor, + # max_increase_factor_near_fail, minimum_dt and maximum_dt limits, then the next + # `n_variables` for RK accuracy limits. + this_limit_caused_by = CFL_limit_caused_by + 4 + t_params.n_variables end if error_norm_method == "Linf" @@ -717,10 +717,12 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er error_norms = MPI.Reduce(error_norms, max, comm_block[]; root=0) error_norm = nothing + max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks error_norms = MPI.Allreduce(error_norms, max, comm_inter_block[]) - error_norm = maximum(error_norms) + max_error_variable_index = argmax(error_norms) + error_norm = error_norms[max_error_variable_index] end error_norm = MPI.bcast(error_norm, 0, comm_block[]) elseif error_norm_method == "L2" @@ -728,6 +730,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er error_norms = MPI.Reduce(error_norms, +, comm_block[]; root=0) error_norm = nothing + max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks error_norms = MPI.Allreduce(error_norms, +, comm_inter_block[]) @@ -740,6 +743,9 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er # larger number of points in the distribution functions does not mean that # error on the moments is ignored. error_norm = mean(error_norms) + + # Record which variable had the maximum error + max_error_variable_index = argmax(error_norms) end error_norm = MPI.bcast(error_norm, 0, comm_block[]) @@ -790,7 +796,6 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er # Call the 'cause' of the timestep failure the variable that has the biggest # error norm here - max_error_variable_index = argmax(error_norms) t_params.failure_caused_by[max_error_variable_index] += 1 #println("t=$t, timestep failed, error_norm=$error_norm, error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) @@ -821,12 +826,15 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er if t_params.dt[] > CFL_limit t_params.dt[] = CFL_limit else - this_limit_caused_by = 1 + # Reserve first four entries of t_params.limit_caused_by for + # max_increase_factor, max_increase_factor_near_fail, minimum_dt and + # maximum_dt limits. + this_limit_caused_by = 4 + max_error_variable_index end # Limit so timestep cannot increase by a large factor, which might lead to # numerical instability in some cases. - max_cap_limit_caused_by = 2 + max_cap_limit_caused_by = 1 if isinf(t_params.max_increase_factor_near_last_fail) # Not using special timestep limiting near last failed dt value max_cap = t_params.max_increase_factor * t_params.previous_dt[] @@ -843,7 +851,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er max_cap = max(slow_increase_threshold, t_params.max_increase_factor_near_last_fail * t_params.previous_dt[]) - max_cap_limit_caused_by = 3 + max_cap_limit_caused_by = 2 end end if t_params.dt[] > max_cap @@ -854,13 +862,13 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er # Prevent timestep from going below minimum_dt if t_params.dt[] < t_params.minimum_dt t_params.dt[] = t_params.minimum_dt - this_limit_caused_by = 4 + this_limit_caused_by = 3 end # Prevent timestep from going above maximum_dt if t_params.dt[] > t_params.maximum_dt t_params.dt[] = t_params.maximum_dt - this_limit_caused_by = 5 + this_limit_caused_by = 4 end t_params.limit_caused_by[this_limit_caused_by] += 1 diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 76e5a73a7..cc113abfc 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -231,8 +231,8 @@ end Create a [`input_structs.time_info`](@ref) struct using the settings in `t_input`. """ -function setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_reload, - manufactured_solns_input, io_input) +function setup_time_info(t_input, n_variables, code_time, dt_reload, + dt_before_last_fail_reload, manufactured_solns_input, io_input) rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor = setup_runge_kutta_coefficients!(t_input.type, t_input.CFL_prefactor, @@ -286,9 +286,10 @@ function setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_relo else error_sum_zero = 0.0 end - return time_info(t_input.nstep, end_time, dt_shared, previous_dt_shared, next_output_time, - dt_before_output, dt_before_last_fail, CFL_prefactor, step_to_output, - Ref(0), Ref(0), mk_int[], mk_int[], moments_output_times, + return time_info(n_variables, t_input.nstep, end_time, dt_shared, previous_dt_shared, + next_output_time, dt_before_output, dt_before_last_fail, + CFL_prefactor, step_to_output, Ref(0), Ref(0), mk_int[], mk_int[], + t_input.nwrite, t_input.nwrite_dfns, moments_output_times, dfns_output_times, t_input.type, rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, t_input.rtol, t_input.atol, t_input.atol_upar, t_input.step_update_prefactor, t_input.max_increase_factor, @@ -318,50 +319,85 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop n_ion_species = composition.n_ion_species n_neutral_species = composition.n_neutral_species ion_mom_diss_coeff = num_diss_params.ion.moment_dissipation_coefficient - electron_mom_diss_coeff = num_diss_params.electron.moment_dissipation_coefficient neutral_mom_diss_coeff = num_diss_params.neutral.moment_dissipation_coefficient - t_params = setup_time_info(t_input, code_time, dt_reload, dt_before_last_fail_reload, - manufactured_solns_input, io_input) + n_variables = 1 # pdf + if moments.evolve_density + # ion density + n_variables += 1 + end + if moments.evolve_upar + # ion flow + n_variables += 1 + end + if moments.evolve_ppar + # ion pressure + n_variables += 1 + end + if composition.n_neutral_species > 0 + # neutral pdf + n_variables += 1 + if moments.evolve_density + # neutral density + n_variables += 1 + end + if moments.evolve_upar + # neutral flow + n_variables += 1 + end + if moments.evolve_ppar + # neutral pressure + n_variables += 1 + end + end + t_params = setup_time_info(t_input, n_variables, code_time, dt_reload, + dt_before_last_fail_reload, manufactured_solns_input, + io_input) # Make Vectors that count which variable caused timestep limits and timestep failures # the right length. Do this setup even when not using adaptive timestepping, because # it is easier than modifying the file I/O according to whether we are using adaptive # timestepping. # - # Entries for limit by accuracy (which is an average over all variables), - # max_increase_factor, minimum_dt and maximum_dt - push!(t_params.limit_caused_by, 0, 0, 0, 0, 0) + # Entries for limit by max_increase_factor, max_increase_factor_near_last_fail, + # minimum_dt and maximum_dt. + push!(t_params.limit_caused_by, 0, 0, 0, 0) # ion pdf - push!(t_params.limit_caused_by, 0, 0) + push!(t_params.limit_caused_by, 0, 0, 0) # RK accuracy plus 2 CFL limits push!(t_params.failure_caused_by, 0) if moments.evolve_density # ion density + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_upar # ion flow + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_ppar # ion pressure + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if composition.n_neutral_species > 0 # neutral pdf - push!(t_params.limit_caused_by, 0, 0) + push!(t_params.limit_caused_by, 0, 0, 0) # RK accuracy plus 2 CFL limits push!(t_params.failure_caused_by, 0) if moments.evolve_density # neutral density + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_upar # neutral flow + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end if moments.evolve_ppar # neutral pressure + push!(t_params.limit_caused_by, 0) # RK accuracy push!(t_params.failure_caused_by, 0) end end From e749e2e3b868d2699e8fd767d672e645eda103da Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 9 May 2024 09:16:12 +0100 Subject: [PATCH 06/75] Use some different line styles so limit_caused_by plot is easier to read --- .../src/makie_post_processing.jl | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index dbf69ab4b..2c971b730 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7083,63 +7083,70 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) # Accuracy limit counters counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion pdf RK accuracy", ax=ax) + label=prefix * "ion pdf RK accuracy", ax=ax, linestyle=:dash) if ri.evolve_density counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion density RK accuracy", ax=ax) + label=prefix * "ion density RK accuracy", ax=ax, + linestyle=:dash) end if ri.evolve_upar counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion upar RK accuracy", ax=ax) + label=prefix * "ion upar RK accuracy", ax=ax, + linestyle=:dash) end if ri.evolve_ppar counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion ppar RK accuracy", ax=ax) + label=prefix * "ion ppar RK accuracy", ax=ax, + linestyle=:dash) end if ri.n_neutral_species > 0 counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral pdf RK accuracy", ax=ax) + label=prefix * "neutral pdf RK accuracy", ax=ax, + linestyle=:dash) if ri.evolve_density counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral density RK accuracy", ax=ax) + label=prefix * "neutral density RK accuracy", ax=ax, + linestyle=:dash) end if ri.evolve_upar counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral uz RK accuracy", ax=ax) + label=prefix * "neutral uz RK accuracy", ax=ax, + linestyle=:dash) end if ri.evolve_ppar counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral pz RK accuracy", ax=ax) + label=prefix * "neutral pz RK accuracy", ax=ax, + linestyle=:dash) end end # Ion z advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion z advect", ax=ax) + label=prefix * "ion z advect", ax=ax, linestyle=:dot) # Ion vpa advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion vpa advect", ax=ax) + label=prefix * "ion vpa advect", ax=ax, linestyle=:dot) if ri.n_neutral_species > 0 # Ion z advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral z advect", ax=ax) + label=prefix * "neutral z advect", ax=ax, linestyle=:dot) # Ion vpa advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "neutral vz advect", ax=ax) + label=prefix * "neutral vz advect", ax=ax, linestyle=:dot) end if counter > size(limit_caused_by_per_output, 1) From 14fec1d67fac4ff7f6be7848f0dd9446cc9b6ca2 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 8 May 2024 13:23:30 +0100 Subject: [PATCH 07/75] Try to recover by decreasing timestep if error_norm is NaN This may work as long as the timestep is above `dt_minimum` so that it can actually be decreased. --- moment_kinetics/src/runge_kutta.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 7bb70c3bb..80ee12976 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -756,7 +756,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er # Use current_dt instead of t_params.dt[] here because we are about to write to # the shared-memory variable t_params.dt[] below, and we do not want to add an extra # _block_synchronize() call after reading it here. - if error_norm > 1.0 && current_dt > t_params.minimum_dt + if (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt # Timestep failed, reduce timestep and re-try # Set scratch[end] equal to scratch[1] to start the timestep over From 216870318d4449399eb07370ed8e26029e6596b1 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 15 May 2024 11:49:25 +0100 Subject: [PATCH 08/75] Fix output time for adaptive timestep runs Bug meant `t_params.dt[]` on the step up to an output was being made slightly too large. --- moment_kinetics/src/runge_kutta.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 80ee12976..3bbb872d9 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -885,7 +885,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er @serial_region begin if t + t_params.dt[] >= t_params.next_output_time[] t_params.dt_before_output[] = t_params.dt[] - t_params.dt[] = t_params.next_output_time[] - t + t_params.dt[] = t_params.next_output_time[] - current_time t_params.step_to_output[] = true end end From 152f917bd858e7c7696ca5f3fd33632d910bf250 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 17 May 2024 10:11:30 +0100 Subject: [PATCH 09/75] Fix CFL condition diagnostics for non-moment-kinetic runs Need to handle external source coefficients that may not be saved in non-moment-kinetic cases. Also need to pass `gEz` instead of `Ez`. --- moment_kinetics/src/load_data.jl | 50 +++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index e86ba4043..49b0f9952 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -3336,9 +3336,6 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t end end elseif variable_name == "vpa_advect_speed" - # update_speed_z!() requires all dimensions to be present, so do *not* pass kwargs - # to get_variable() in this case. Instead select a slice of the result. - Ez = get_variable(run_info, "Ez") density = get_variable(run_info, "density") upar = get_variable(run_info, "parallel_flow") ppar = get_variable(run_info, "parallel_pressure") @@ -3352,9 +3349,21 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t dqpar_dz = get_z_derivative(run_info, "parallel_heat_flux") if run_info.external_source_settings.ion.active external_source_amplitude = get_variable(run_info, "external_source_amplitude") - external_source_density_amplitude = get_variable(run_info, "external_source_density_amplitude") - external_source_momentum_amplitude = get_variable(run_info, "external_source_momentum_amplitude") - external_source_pressure_amplitude = get_variable(run_info, "external_source_pressure_amplitude") + if run_info.evolve_density + external_source_density_amplitude = get_variable(run_info, "external_source_density_amplitude") + else + external_source_density_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_upar + external_source_momentum_amplitude = get_variable(run_info, "external_source_momentum_amplitude") + else + external_source_momentum_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_ppar + external_source_pressure_amplitude = get_variable(run_info, "external_source_pressure_amplitude") + else + external_source_pressure_amplitude = zeros(0,0,run_info.nt) + end else external_source_amplitude = zeros(0,0,run_info.nt) external_source_density_amplitude = zeros(0,0,run_info.nt) @@ -3366,6 +3375,15 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t nvperp = run_info.vperp.n nvpa = run_info.vpa.n + # update_speed_z!() requires all dimensions to be present, so do *not* pass kwargs + # to get_variable() in this case. Instead select a slice of the result. + Ez = get_variable(run_info, "Ez") + gEz = allocate_float(nvperp, nz, nr, nspecies, nt) + for it ∈ 1:nt, is ∈ 1:nspecies, ir ∈ 1:nr, iz ∈ 1:nz + # Don't support gyroaveraging here (yet) + gEz[:,iz,ir,is,it] .= Ez[iz,ir,it] + end + speed=allocate_float(nvpa, nvperp, nz, nr, nspecies, nt) setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, @@ -3376,7 +3394,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t # Only need some struct with a 'speed' variable advect = [(speed=@view(speed[:,:,:,:,is,it]),) for is ∈ 1:nspecies] # Only need Ez - fields = (Ez=@view(Ez[:,:,it]),) + fields = (gEz=@view(gEz[:,:,:,:,it]),) @views moments = (ion=(dppar_dz=dppar_dz[:,:,:,it], dupar_dz=dupar_dz[:,:,:,it], dvth_dz=dvth_dz[:,:,:,it], @@ -3479,9 +3497,21 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t dqz_dz = get_z_derivative(run_info, "qz_neutral") if run_info.external_source_settings.neutral.active external_source_amplitude = get_variable(run_info, "external_source_neutral_amplitude") - external_source_density_amplitude = get_variable(run_info, "external_source_neutral_density_amplitude") - external_source_momentum_amplitude = get_variable(run_info, "external_source_neutral_momentum_amplitude") - external_source_pressure_amplitude = get_variable(run_info, "external_source_neutral_pressure_amplitude") + if run_info.evolve_density + external_source_density_amplitude = get_variable(run_info, "external_source_neutral_density_amplitude") + else + external_source_density_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_upar + external_source_momentum_amplitude = get_variable(run_info, "external_source_neutral_momentum_amplitude") + else + external_source_momentum_amplitude = zeros(0,0,run_info.nt) + end + if run_info.evolve_ppar + external_source_pressure_amplitude = get_variable(run_info, "external_source_neutral_pressure_amplitude") + else + external_source_pressure_amplitude = zeros(0,0,run_info.nt) + end else external_source_amplitude = zeros(0,0,run_info.nt) external_source_density_amplitude = zeros(0,0,run_info.nt) From 1f9b8cd7a1ac042673a81d7d4317c059af19e1f3 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 17 May 2024 10:14:12 +0100 Subject: [PATCH 10/75] Fix hard_force_moment_constraints!() for non-moment-kinetic case This is useful as the function may be called in implicit timestepping functions without first checking whether the run is moment-kinetic. --- moment_kinetics/src/moment_constraints.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index ae49821dd..5b78063b2 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -75,6 +75,10 @@ function hard_force_moment_constraints!(f, moments, vpa) A = 1.0 / I0 @. f1d = A * f1d + B = NaN + C = NaN + else + A = NaN B = NaN C = NaN end @@ -125,6 +129,10 @@ function hard_force_moment_constraints_neutral!(f, moments, vz) A = 1.0 / I0 @. f1d = A * f1d + B = NaN + C = NaN + else + A = NaN B = NaN C = NaN end From 4bb6d550c6cfc7fa403a4d85cfa65ac2184c76e1 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 17 May 2024 12:17:31 +0100 Subject: [PATCH 11/75] Use dashed lines for neutral CFL limits Lets us see if ion and neutral limits overlap. --- .../makie_post_processing/src/makie_post_processing.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 2c971b730..153119fab 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7034,7 +7034,12 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) for varname ∈ CFL_vars var = get_variable(ri, varname) maxval = min(maxval, maximum(var)) - plot_1d(time, var; ax=ax, label=prefix*varname) + if occursin("neutral", varname) + linestyle = :dash + else + linestyle = nothing + end + plot_1d(time, var; ax=ax, label=prefix*varname, linestyle=linestyle) end end ylims!(ax, 0.0, 4.0 * maxval) From 805f27f4fef6329453f0a87fb7c1db88f3d8ac1b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 17 May 2024 15:11:49 +0100 Subject: [PATCH 12/75] Use NaNMath when plotting CFL limits to avoid errors --- .../makie_post_processing/src/makie_post_processing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 153119fab..b9db838f6 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7033,7 +7033,7 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) end for varname ∈ CFL_vars var = get_variable(ri, varname) - maxval = min(maxval, maximum(var)) + maxval = NaNMath.min(maxval, NaNMath.maximum(var)) if occursin("neutral", varname) linestyle = :dash else From de88da2d268222e060e34a2180d3e1c65f47b6ff Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 09:27:51 +0100 Subject: [PATCH 13/75] Make use of `yscale` slightly more robust in plot_1d() --- .../makie_post_processing/src/makie_post_processing.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index b9db838f6..e89ef213e 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -2462,9 +2462,6 @@ function plot_1d(xcoord, data; ax=nothing, xlabel=nothing, ylabel=nothing, title if title !== nothing ax.title = title end - if yscale !== nothing - ax.yscale = yscale - end if transform !== identity # Use transform to allow user to do something like data = abs.(data) @@ -2476,6 +2473,10 @@ function plot_1d(xcoord, data; ax=nothing, xlabel=nothing, ylabel=nothing, title l = lines!(ax, xcoord, data; kwargs...) + if yscale !== nothing + ax.yscale = yscale + end + if fig === nothing return l else From f6dacc1acd598ea46f3e437333236c904907a71f Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 09:38:45 +0100 Subject: [PATCH 14/75] Fix y-axis limits when using log scale in animate_f_unnorm_vs_vpa --- .../src/makie_post_processing.jl | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index e89ef213e..bb46716be 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -4063,8 +4063,9 @@ end function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutral=false, is=1, iz=nothing, fig=nothing, ax=nothing, - frame_index=nothing, outfile=nothing, transform=identity, - axis_args=Dict{Symbol,Any}(), kwargs...) + frame_index=nothing, outfile=nothing, yscale=nothing, + transform=identity, axis_args=Dict{Symbol,Any}(), + kwargs...) if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4142,20 +4143,27 @@ function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, this_f_unnorm = get_this_f_unnorm(it) - this_fmin, this_fmax = NaNMath.extrema(transform(this_f_unnorm)) + this_fmin, this_fmax = NaNMath.extrema(transform.(this_f_unnorm)) fmin = min(fmin, this_fmin) fmax = max(fmax, this_fmax) end yheight = fmax - fmin xwidth = dzdtmax - dzdtmin - limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth, - fmin - 0.01*yheight, fmax + 0.01*yheight) + if yscale ∈ (log, log10) + # Need to calclutate y offsets differently to non-logarithmic y-axis case, to + # ensure ymin is not negative. + limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth, + fmin * (fmin/fmax)^0.01, fmax * (fmax/fmin)^0.01) + else + limits!(ax, dzdtmin - 0.01*xwidth, dzdtmax + 0.01*xwidth, + fmin - 0.01*yheight, fmax + 0.01*yheight) + end dzdt = @lift vpagrid_to_dzdt(run_info.vpa.grid, vth[$frame_index], upar[$frame_index], run_info.evolve_ppar, run_info.evolve_upar) f_unnorm = @lift transform.(get_this_f_unnorm($frame_index)) - l = plot_1d(dzdt, f_unnorm; ax=ax, label=run_info.run_name, kwargs...) + l = plot_1d(dzdt, f_unnorm; ax=ax, label=run_info.run_name, yscale=yscale, kwargs...) if outfile !== nothing if fig === nothing From 1e3d52ba78da6e4a1b944249daae3cbd0c3059d5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 09:39:21 +0100 Subject: [PATCH 15/75] Plot 4 grid points near the boundary in wall plots, and add log plots --- .../src/makie_post_processing.jl | 302 ++++++++++++++++-- 1 file changed, 282 insertions(+), 20 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index bb46716be..4a0cd3a9a 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -4412,18 +4412,80 @@ function plot_ion_pdf_2D_at_wall(run_info; plot_prefix) && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar) for ri ∈ run_info) - for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+8, "wall-"), - (z_upper, z_upper-8:z_upper, "wall+")) + nt = minimum(ri.nt for ri ∈ run_info) + + for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"), + (z_upper, z_upper-4:z_upper, "wall+")) f_input = copy(input_dict_dfns["f"]) f_input["iz0"] = z if input.plot - plot_vs_vpa(run_info, "f"; is=1, input=f_input, - outfile=plot_prefix * "pdf_$(label)_vs_vpa.pdf") + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_$(label)_vs_vpa.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_$(label)_vs_vpa.pdf" + save(outfile, fig) if moment_kinetic - plot_f_unnorm_vs_vpa(run_info; input=f_input, is=1, - outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa.pdf") + fig, ax = get_1d_ax(; xlabel="vpa_unnorm", ylabel="f_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; input=f_input, is=1, iz=iz, + label="$(run_label)iz=$iz", ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vpa_unnorm", ylabel="f_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; input=f_input, is=1, iz=iz, + label="$(run_label)iz=$iz", ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) end plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=true, input=f_input, is=1, @@ -4451,12 +4513,80 @@ function plot_ion_pdf_2D_at_wall(run_info; plot_prefix) end if input.animate - animate_vs_vpa(run_info, "f"; is=1, input=f_input, - outfile=plot_prefix * "pdf_$(label)_vs_vpa." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f", yscale=log10) + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vpa(ri, "f"; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) if moment_kinetic - animate_f_unnorm_vs_vpa(run_info; input=f_input, is=1, - outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_unnorm_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vpa", ylabel="f") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; is=1, iz=iz, input=f_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_unnorm_$(label)_vs_vpa." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) end animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=true, input=f_input, is=1, @@ -4537,19 +4667,82 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) moment_kinetic = any(ri !== nothing && (ri.evolve_density || ri.evolve_upar || ri.evolve_ppar) for ri ∈ run_info) + nt = minimum(ri.nt for ri ∈ run_info) - for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+8, "wall-"), - (z_upper, z_upper-8:z_upper, "wall+")) + for (z, z_range, label) ∈ ((z_lower, z_lower:z_lower+4, "wall-"), + (z_upper, z_upper-4:z_upper, "wall+")) f_neutral_input = copy(input_dict_dfns["f_neutral"]) f_neutral_input["iz0"] = z if input.plot - plot_vs_vz(run_info, "f_neutral"; is=1, input=f_neutral_input, - outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz.pdf") + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vpa.pdf" + save(outfile, fig) if moment_kinetic - plot_f_unnorm_vs_vpa(run_info; input=f_neutral_input, neutral=true, is=1, - outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vpa.pdf") + fig, ax = get_1d_ax(; xlabel="vz_unnorm", ylabel="f_neutral_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; neutral=true, input=f_neutral_input, + is=1, iz=iz, label="$(run_label)iz=$iz", + ax=ax) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) + + fig, ax = get_1d_ax(; xlabel="vz_unnorm", ylabel="f_neutral_unnorm") + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + plot_f_unnorm_vs_vpa(ri; neutral=true, input=f_neutral_input, + is=1, iz=iz, label="$(run_label)iz=$iz", + ax=ax, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vpa.pdf" + save(outfile, fig) end if !is_1V @@ -4592,12 +4785,81 @@ function plot_neutral_pdf_2D_at_wall(run_info; plot_prefix) end if input.animate - animate_vs_vz(run_info, "f_neutral"; is=1, input=f_neutral_input, - outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral", yscale=log10) + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_vs_vz(ri, "f_neutral"; is=1, iz=iz, input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) if moment_kinetic - animate_f_unnorm_vs_vpa(run_info; input=f_neutral_input, neutral=true, is=1, - outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext) + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; neutral=true, is=1, iz=iz, + input=f_neutral_input, + label="$(run_label)iz=$iz", ax=ax, + frame_index=frame_index) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "pdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) + + fig, ax = get_1d_ax(; xlabel="vz", ylabel="f_neutral") + frame_index = Observable(1) + for iz ∈ z_range + for ri ∈ run_info + if length(run_info) > 1 + run_label = ri.run_name * " " + else + run_label = "" + end + animate_f_unnorm_vs_vpa(ri; neutral=true, is=1, iz=iz, + input=f_neutral_input, label="$(run_label)iz=$iz", + ax=ax, frame_index=frame_index, yscale=log10, + transform=(x)->positive_or_nan(x; epsilon=1.e-20)) + end + end + put_legend_right(fig, ax) + outfile=plot_prefix * "logpdf_neutral_unnorm_$(label)_vs_vz." * input.animation_ext + save_animation(fig, frame_index, nt, outfile) end if !is_1V From c99e9f4c2379d24390578e12c32f07da887692a7 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 09:40:00 +0100 Subject: [PATCH 16/75] Comment out kinetic electron blocks in constraints_plots() These were merged accidentally. --- .../src/makie_post_processing.jl | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 4a0cd3a9a..7faf260ce 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -5026,35 +5026,35 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) end # Electrons - if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) - - fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") - for ri ∈ run_info - if length(run_info) > 1 - prefix = ri.run_name * ", " - else - prefix = "" - end - - varname = "electron_constraints_A_coefficient" - label = prefix * "(A-1)" - data = get_variable(ri, varname; it=it0, ir=ir0) - data .-= 1.0 - plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input) - - varname = "electron_constraints_B_coefficient" - label = prefix * "B" - plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, - input=input) - - varname = "electron_constraints_C_coefficient" - label = prefix * "C" - plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, - input=input) - end - put_legend_right(fig, ax) - save(plot_prefix * "electron_constraints.pdf", fig) - end + #if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) + + # fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") + # for ri ∈ run_info + # if length(run_info) > 1 + # prefix = ri.run_name * ", " + # else + # prefix = "" + # end + + # varname = "electron_constraints_A_coefficient" + # label = prefix * "(A-1)" + # data = get_variable(ri, varname; it=it0, ir=ir0) + # data .-= 1.0 + # plot_vs_z(ri, varname; label=label, data=data, ax=ax, input=input) + + # varname = "electron_constraints_B_coefficient" + # label = prefix * "B" + # plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, + # input=input) + + # varname = "electron_constraints_C_coefficient" + # label = prefix * "C" + # plot_vs_z(ri, varname; label=label, ax=ax, it=it0, ir=ir0, + # input=input) + # end + # put_legend_right(fig, ax) + # save(plot_prefix * "electron_constraints.pdf", fig) + #end end if input.animate @@ -5188,53 +5188,53 @@ function constraints_plots(run_info; plot_prefix=plot_prefix) end # Electrons - if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) - - frame_index = Observable(1) - fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") - - # Calculate plot limits manually so we can exclude the first time point, which - # often has a large value for (A-1) due to the way initialisation is done, - # which can make the subsequent values hard to see. - ymin = Inf - ymax = -Inf - for ri ∈ run_info - if length(run_info) > 1 - prefix = ri.run_name * ", " - else - prefix = "" - end - - varname = "electron_constraints_A_coefficient" - label = prefix * "(A-1)" - data = get_variable(ri, varname; ir=ir0) - data .-= 1.0 - ymin = min(ymin, minimum(data[:,2:end])) - ymax = max(ymax, maximum(data[:,2:end])) - animate_vs_z(ri, varname; label=label, data=data, - frame_index=frame_index, ax=ax, input=input) - - varname = "electron_constraints_B_coefficient" - label = prefix * "B" - data = get_variable(ri, varname; ir=ir0) - ymin = min(ymin, minimum(data[:,2:end])) - ymax = max(ymax, maximum(data[:,2:end])) - animate_vs_z(ri, varname; label=label, data=data, - frame_index=frame_index, ax=ax, ir=ir0, input=input) - - varname = "electron_constraints_C_coefficient" - label = prefix * "C" - data = get_variable(ri, varname; ir=ir0) - ymin = min(ymin, minimum(data[:,2:end])) - ymax = max(ymax, maximum(data[:,2:end])) - animate_vs_z(ri, varname; label=label, data=data, - frame_index=frame_index, ax=ax, ir=ir0, input=input) - end - put_legend_right(fig, ax) - ylims!(ax, ymin, ymax) - save_animation(fig, frame_index, nt, - plot_prefix * "electron_constraints." * input.animation_ext) - end + #if any(ri.composition.electron_physics == kinetic_electrons for ri ∈ run_info) + + # frame_index = Observable(1) + # fig, ax = get_1d_ax(; xlabel="z", ylabel="constraint coefficient") + + # # Calculate plot limits manually so we can exclude the first time point, which + # # often has a large value for (A-1) due to the way initialisation is done, + # # which can make the subsequent values hard to see. + # ymin = Inf + # ymax = -Inf + # for ri ∈ run_info + # if length(run_info) > 1 + # prefix = ri.run_name * ", " + # else + # prefix = "" + # end + + # varname = "electron_constraints_A_coefficient" + # label = prefix * "(A-1)" + # data = get_variable(ri, varname; ir=ir0) + # data .-= 1.0 + # ymin = min(ymin, minimum(data[:,2:end])) + # ymax = max(ymax, maximum(data[:,2:end])) + # animate_vs_z(ri, varname; label=label, data=data, + # frame_index=frame_index, ax=ax, input=input) + + # varname = "electron_constraints_B_coefficient" + # label = prefix * "B" + # data = get_variable(ri, varname; ir=ir0) + # ymin = min(ymin, minimum(data[:,2:end])) + # ymax = max(ymax, maximum(data[:,2:end])) + # animate_vs_z(ri, varname; label=label, data=data, + # frame_index=frame_index, ax=ax, ir=ir0, input=input) + + # varname = "electron_constraints_C_coefficient" + # label = prefix * "C" + # data = get_variable(ri, varname; ir=ir0) + # ymin = min(ymin, minimum(data[:,2:end])) + # ymax = max(ymax, maximum(data[:,2:end])) + # animate_vs_z(ri, varname; label=label, data=data, + # frame_index=frame_index, ax=ax, ir=ir0, input=input) + # end + # put_legend_right(fig, ax) + # ylims!(ax, ymin, ymax) + # save_animation(fig, frame_index, nt, + # plot_prefix * "electron_constraints." * input.animation_ext) + #end end catch e println("Error in constraints_plots(). Error was ", e) From 3cff3eb120b0633e1b41a5801c410341732afdce Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 14:07:45 +0100 Subject: [PATCH 17/75] Handle Dirichlet bc in gausslegendre diffusion matrices --- moment_kinetics/src/coordinates.jl | 3 ++- moment_kinetics/src/gauss_legendre.jl | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 5d5531d84..746d3143d 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -242,7 +242,8 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing elseif input.discretization == "gausslegendre_pseudospectral" # create arrays needed for explicit GaussLegendre pseudospectral treatment in this # coordinate and create the matrices for differentiation - spectral = setup_gausslegendre_pseudospectral(coord, collision_operator_dim=collision_operator_dim) + spectral = setup_gausslegendre_pseudospectral(coord, collision_operator_dim=collision_operator_dim, + dirichlet_bc=occursin("zero", coord.bc)) # obtain the local derivatives of the uniform grid with respect to the used grid derivative!(coord.duniform_dgrid, coord.uniform_grid, coord, spectral) else diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 24a0b925f..539a5fd86 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -100,7 +100,7 @@ struct gausslegendre_info{TSparse, TLU} <: weak_discretization_info Qmat::Array{mk_float,2} end -function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) +function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true, dirichlet_bc=true) lobatto = setup_gausslegendre_pseudospectral_lobatto(coord,collision_operator_dim=collision_operator_dim) radau = setup_gausslegendre_pseudospectral_radau(coord,collision_operator_dim=collision_operator_dim) @@ -114,9 +114,9 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) K_matrix = allocate_float(coord.n,coord.n) L_matrix = allocate_float(coord.n,coord.n) - setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M") - setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms") - setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms") + setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; dirichlet_bc=dirichlet_bc) + setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; dirichlet_bc=dirichlet_bc) + setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms"; dirichlet_bc=dirichlet_bc) mass_matrix_lu = lu(sparse(mass_matrix)) Qmat = allocate_float(coord.ngrid,coord.ngrid) @@ -835,7 +835,7 @@ where M is the mass matrix and K is the stiffness matrix. function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, lobatto::gausslegendre_base_info, radau::gausslegendre_base_info, - coord,option) + coord,option; dirichlet_bc=false) QQ_j = allocate_float(coord.ngrid,coord.ngrid) QQ_jp1 = allocate_float(coord.ngrid,coord.ngrid) @@ -883,6 +883,19 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:]./2.0 end end + + if dirichlet_bc + # Make matrix diagonal for first/last grid points so it does not change the values + # there + if coord.irank == 0 + QQ_global[1,:] .= 0.0 + QQ_global[1,1] = 1.0 + end + if coord.irank == coord.nrank - 1 + QQ_global[end,:] .= 0.0 + QQ_global[end,end] = 1.0 + end + end return nothing end From 0d3c06b573a74698dc413c1a5734df32eb1fe4a2 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 26 May 2024 13:07:29 +0100 Subject: [PATCH 18/75] Fix post-processing loading of advection speeds --- moment_kinetics/src/load_data.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 49b0f9952..8c498c1c6 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -3434,6 +3434,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t speed = allocate_float(nz, nvz, nvr, nvzeta, nr, nspecies, nt) + setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, vperp=run_info.vperp.n, vpa=run_info.vpa.n, vzeta=nvzeta, vr=nvr, vz=nvz) @@ -3525,6 +3526,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t nvz = run_info.vz.n speed = allocate_float(nvz, nvr, nvzeta, nz, nr, nspecies, nt) + setup_distributed_memory_MPI(1,1,1,1) setup_loop_ranges!(0, 1; s=nspecies, sn=run_info.n_neutral_species, r=nr, z=nz, vperp=run_info.vperp.n, vpa=run_info.vpa.n, vzeta=nvzeta, vr=nvr, vz=nvz) From b91769b523c6cb26d868b03d060e6f0b65c5b22c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 7 May 2024 09:38:08 +0100 Subject: [PATCH 19/75] Check t_params.step_counter[] to write output when not adaptive When not using adaptive timestepping, go back to checking the step counter against `nwrite_moments` and `nwrite_dfns`. This is slightly more complicated, as adaptive and non-adaptive schemes have different ways of determining whether to write output, but will make it easier to add a debug mode where adaptive timestep schemes write output after a fixed number of steps rather than after a fixed simulation time. --- moment_kinetics/src/input_structs.jl | 2 + moment_kinetics/src/time_advance.jl | 80 ++++++++++++++++++---------- 2 files changed, 54 insertions(+), 28 deletions(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 89afcb830..e16183168 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -53,6 +53,8 @@ struct time_info{Terrorsum <: Real} failure_counter::Ref{mk_int} failure_caused_by::Vector{mk_int} limit_caused_by::Vector{mk_int} + nwrite_moments::mk_int + nwrite_dfns::mk_int moments_output_times::Vector{mk_float} dfns_output_times::Vector{mk_float} type::String diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index cc113abfc..4dfc3d4af 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -262,23 +262,29 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end_time = code_time + t_input.dt * t_input.nstep epsilon = 1.e-11 - if t_input.nwrite == 0 - moments_output_times = [end_time] - else - moments_output_times = [code_time + i*t_input.dt - for i ∈ t_input.nwrite:t_input.nwrite:t_input.nstep] - end - if moments_output_times[end] < end_time - epsilon - push!(moments_output_times, end_time) - end - if t_input.nwrite_dfns == 0 - dfns_output_times = [end_time] + if adaptive + if t_input.nwrite == 0 + moments_output_times = [end_time] + else + moments_output_times = [code_time + i*t_input.dt + for i ∈ t_input.nwrite:t_input.nwrite:t_input.nstep] + end + if moments_output_times[end] < end_time - epsilon + push!(moments_output_times, end_time) + end + if t_input.nwrite_dfns == 0 + dfns_output_times = [end_time] + else + dfns_output_times = [code_time + i*t_input.dt + for i ∈ t_input.nwrite_dfns:t_input.nwrite_dfns:t_input.nstep] + end + if dfns_output_times[end] < end_time - epsilon + push!(dfns_output_times, end_time) + end else - dfns_output_times = [code_time + i*t_input.dt - for i ∈ t_input.nwrite_dfns:t_input.nwrite_dfns:t_input.nstep] - end - if dfns_output_times[end] < end_time - epsilon - push!(dfns_output_times, end_time) + # Use nwrite_moments and nwrite_dfns to determine when to write output + moments_output_times = mk_float[] + dfns_output_times = mk_float[] end if t_input.high_precision_error_sum @@ -1021,9 +1027,11 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr moments_output_counter = 1 dfns_output_counter = 1 @serial_region begin - t_params.next_output_time[] = - min(t_params.moments_output_times[moments_output_counter], - t_params.dfns_output_times[dfns_output_counter]) + if t_params.adaptive + t_params.next_output_time[] = + min(t_params.moments_output_times[moments_output_counter], + t_params.dfns_output_times[dfns_output_counter]) + end end _block_synchronize() @@ -1039,9 +1047,18 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end while true - diagnostic_checks = (t + t_params.dt[] ≥ t_params.moments_output_times[moments_output_counter] - epsilon - || t + t_params.dt[] ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon - || t + t_params.dt[] ≥ t_params.end_time - epsilon) + if t_params.adaptive + maybe_write_moments = (t + t_params.dt[] ≥ t_params.moments_output_times[moments_output_counter] - epsilon + || t + t_params.dt[] ≥ t_params.end_time - epsilon) + maybe_write_dfns = (t + t_params.dt[] ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon + || t + t_params.dt[] ≥ t_params.end_time - epsilon) + else + maybe_write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0 + || t_params.step_counter[] >= t_params.nstep) + maybe_write_dfns = (t_params.step_counter[] % t_params.nwrite_dfns == 0 + || t_params.step_counter[] >= t_params.nstep) + end + diagnostic_checks = (maybe_write_moments || maybe_write_dfns) if t_params.split_operators # MRH NOT SUPPORTED @@ -1077,7 +1094,18 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr finish_now = true end - if t ≥ t_params.moments_output_times[moments_output_counter] - epsilon + if t_params.adaptive + write_moments = (t ≥ t_params.moments_output_times[moments_output_counter] - epsilon + || t ≥ t_params.end_time - epsilon) + write_dfns = (t ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon + || t ≥ t_params.end_time - epsilon) + else + write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0 + || t_params.step_counter[] >= t_params.nstep) + write_dfns = (t_params.step_counter[] % t_params.nwrite_dfns == 0 + || t_params.step_counter[] >= t_params.nstep) + end + if write_moments moments_output_counter += 1 if moments_output_counter ≤ length(t_params.moments_output_times) @serial_region begin @@ -1087,10 +1115,8 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end end write_moments = true - else - write_moments = false end - if t ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon + if write_dfns dfns_output_counter += 1 if dfns_output_counter ≤ length(t_params.dfns_output_times) @serial_region begin @@ -1100,8 +1126,6 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end end write_dfns = true - else - write_dfns = false end if write_moments || write_dfns || finish_now From 3964b0018acdf3b24c49f836069810e37088fe86 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 12 May 2024 11:24:38 +0100 Subject: [PATCH 20/75] Re-apply output step fix with `just_completed_output_step` Had been lost in a bad merge. --- moment_kinetics/src/runge_kutta.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 3bbb872d9..89c233b75 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -753,6 +753,8 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er error("Unrecognized error_norm_method '$method'") end + just_completed_output_step = false + # Use current_dt instead of t_params.dt[] here because we are about to write to # the shared-memory variable t_params.dt[] below, and we do not want to add an extra # _block_synchronize() call after reading it here. @@ -815,6 +817,8 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er if t_params.dt[] > CFL_limit t_params.dt[] = CFL_limit end + + just_completed_output_step = true else # Adjust timestep according to Fehlberg's suggestion # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method). @@ -883,7 +887,7 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er end @serial_region begin - if t + t_params.dt[] >= t_params.next_output_time[] + if !just_completed_output_step && t + t_params.dt[] >= t_params.next_output_time[] t_params.dt_before_output[] = t_params.dt[] t_params.dt[] = t_params.next_output_time[] - current_time t_params.step_to_output[] = true From 029bba462e92981748dbe2f4a6e8c92a2d8f66ff Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 7 May 2024 09:43:13 +0100 Subject: [PATCH 21/75] Option to write after fixed step count when using adaptive timestep ...rather than after a fixed simulation time. May be useful for debugging. --- moment_kinetics/src/input_structs.jl | 1 + moment_kinetics/src/moment_kinetics_input.jl | 1 + moment_kinetics/src/runge_kutta.jl | 5 ++++- moment_kinetics/src/time_advance.jl | 11 ++++++----- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index e16183168..17c0e1918 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -72,6 +72,7 @@ struct time_info{Terrorsum <: Real} last_fail_proximity_factor::mk_float minimum_dt::mk_float maximum_dt::mk_float + write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool steady_state_residual::Bool diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 56358a27b..4aa85b666 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -211,6 +211,7 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) last_fail_proximity_factor=1.05, minimum_dt=0.0, maximum_dt=Inf, + write_after_fixed_step_count=false, high_precision_error_sum=false, ) if timestepping_section["nwrite"] > timestepping_section["nstep"] diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 89c233b75..bb9cff095 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -887,7 +887,10 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er end @serial_region begin - if !just_completed_output_step && t + t_params.dt[] >= t_params.next_output_time[] + current_time = t + t_params.previous_dt[] + if (!t_params.write_after_fixed_step_count && !just_completed_output_step + && (current_time + t_params.dt[] >= t_params.next_output_time[])) + t_params.dt_before_output[] = t_params.dt[] t_params.dt[] = t_params.next_output_time[] - current_time t_params.step_to_output[] = true diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 4dfc3d4af..fa01cba5a 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -262,7 +262,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end_time = code_time + t_input.dt * t_input.nstep epsilon = 1.e-11 - if adaptive + if adaptive || t_input.write_after_fixed_step_count if t_input.nwrite == 0 moments_output_times = [end_time] else @@ -301,7 +301,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, t_input.step_update_prefactor, t_input.max_increase_factor, t_input.max_increase_factor_near_last_fail, t_input.last_fail_proximity_factor, t_input.minimum_dt, - t_input.maximum_dt, error_sum_zero, t_input.split_operators, + t_input.maximum_dt, t_input.write_after_fixed_step_count, + error_sum_zero, t_input.split_operators, t_input.steady_state_residual, t_input.converged_residual_value, manufactured_solns_input.use_for_advance, t_input.stopfile_name) end @@ -1027,7 +1028,7 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr moments_output_counter = 1 dfns_output_counter = 1 @serial_region begin - if t_params.adaptive + if t_params.adaptive && !t_params.write_after_fixed_step_count t_params.next_output_time[] = min(t_params.moments_output_times[moments_output_counter], t_params.dfns_output_times[dfns_output_counter]) @@ -1047,7 +1048,7 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr end while true - if t_params.adaptive + if t_params.adaptive && !t_params.write_after_fixed_step_count maybe_write_moments = (t + t_params.dt[] ≥ t_params.moments_output_times[moments_output_counter] - epsilon || t + t_params.dt[] ≥ t_params.end_time - epsilon) maybe_write_dfns = (t + t_params.dt[] ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon @@ -1094,7 +1095,7 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr finish_now = true end - if t_params.adaptive + if t_params.adaptive && !t_params.write_after_fixed_step_count write_moments = (t ≥ t_params.moments_output_times[moments_output_counter] - epsilon || t ≥ t_params.end_time - epsilon) write_dfns = (t ≥ t_params.dfns_output_times[dfns_output_counter] - epsilon From 2b21389886470b8c3beff0598fd1b13a78383b00 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 8 May 2024 17:26:17 +0100 Subject: [PATCH 22/75] Function for z-derivatives of 1D arrays This can be useful for nonlinear solvers used for implicit parts of the timestep. --- moment_kinetics/src/calculus.jl | 8 +++++++- moment_kinetics/src/derivatives.jl | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/calculus.jl b/moment_kinetics/src/calculus.jl index fbff04a2f..d2c16a478 100644 --- a/moment_kinetics/src/calculus.jl +++ b/moment_kinetics/src/calculus.jl @@ -362,7 +362,10 @@ function assign_endpoint!(df1d::AbstractArray{mk_float,Ndims}, # test against coord name -- make sure to use exact string delimiters e.g. "x" not 'x' # test against Ndims (autodetermined) to choose which array slices to use in assigning endpoints #println("DEBUG MESSAGE: coord.name: ",coord.name," Ndims: ",Ndims," key: ",key) - if coord.name == "z" && Ndims==2 + if coord.name == "z" && Ndims==1 + df1d[j] = receive_buffer[] + #println("ASSIGNING DATA") + elseif coord.name == "z" && Ndims==2 df1d[j,:] .= receive_buffer[:] #println("ASSIGNING DATA") elseif coord.name == "z" && Ndims==3 @@ -374,6 +377,9 @@ function assign_endpoint!(df1d::AbstractArray{mk_float,Ndims}, elseif coord.name == "z" && Ndims==6 df1d[:,:,:,j,:,:] .= receive_buffer[:,:,:,:,:] #println("ASSIGNING DATA") + elseif coord.name == "r" && Ndims==1 + df1d[j] = receive_buffer[] + #println("ASSIGNING DATA") elseif coord.name == "r" && Ndims==2 df1d[:,j] .= receive_buffer[:] #println("ASSIGNING DATA") diff --git a/moment_kinetics/src/derivatives.jl b/moment_kinetics/src/derivatives.jl index e85e91158..c3e2c0523 100644 --- a/moment_kinetics/src/derivatives.jl +++ b/moment_kinetics/src/derivatives.jl @@ -149,6 +149,33 @@ dfns (ion) -> [vpa,vperp,z,r,s] dfns (neutrals) -> [vz,vr,vzeta,z,r,sn] """ +#df/dz +#1D version for f[z], used by implicit solvers +function derivative_z!(dfdz::AbstractArray{mk_float,1}, f::AbstractArray{mk_float,1}, + dfdz_lower_endpoints::AbstractArray{mk_float,0}, + dfdz_upper_endpoints::AbstractArray{mk_float,0}, + z_send_buffer::AbstractArray{mk_float,0}, + z_receive_buffer::AbstractArray{mk_float,0}, z_spectral, z) + + begin_serial_region() + + @serial_region begin + # differentiate f w.r.t z + derivative!(dfdz, f, z, z_spectral) + # get external endpoints to reconcile via MPI + dfdz_lower_endpoints[] = z.scratch_2d[1,1] + dfdz_upper_endpoints[] = z.scratch_2d[end,end] + end + + # now reconcile element boundaries across + # processes with large message involving all y + if z.nelement_local < z.nelement_global + reconcile_element_boundaries_MPI!( + dfdz, dfdz_lower_endpoints, dfdz_upper_endpoints, z_send_buffer, + z_receive_buffer, z) + end +end + #df/dz #2D version for f[z,r] -> Er, Ez, phi function derivative_z!(dfdz::AbstractArray{mk_float,2}, f::AbstractArray{mk_float,2}, From 434ac35141c1bf8f092a6e6a52ff83feeb486cfd Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 29 Apr 2024 22:14:17 +0100 Subject: [PATCH 23/75] Simplify convert_butcher_tableau_for_moment_kinetics() ...using solve_for() (or a hacked version of it...) from Symbolics.jl to extract rk_coefs from equations defining y[i] in terms of Butcher tables 'a' and 'b', or in terms of rk_coefs. --- util/calculate_rk_coeffs.jl | 438 ++++++++++++++++++++++-------------- 1 file changed, 269 insertions(+), 169 deletions(-) diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl index 952444ea1..5767d7705 100644 --- a/util/calculate_rk_coeffs.jl +++ b/util/calculate_rk_coeffs.jl @@ -6,6 +6,115 @@ into ones that we can use. using Symbolics +# Following two functions copied and modified from Symbolics.jl's linear_algebra.jl so +# that we can hack them to force them to return a Rational{BigInt} result. +# Modifications: +# * Add prefix `my_` to the function names, to avoid confusion/conflicts +# * Change `Num.()` to `Rational{BigInt}.()` in `_my_solve` so that `A` and `b` are +# arrays of `Rational{BigInt}` (so that we avoid any rounding errors). For the case +# that we want, the entries of `A` and `b` are all numerical values (not actual +# symbolic expressions), so this hack can be done. +# * Change `/` to `//` in `my_sym_lu2()` +using Symbolics: linear_expansion, SymbolicUtils, value, sym_lu, Num, RCNum, _iszero, nterms +using LinearAlgebra +function my_solve_for(eq, var; simplify=false, check=true) # scalar case + # simplify defaults for `false` as canonicalization should handle most of + # the cases. + a, b, islinear = linear_expansion(eq, var) + check && @assert islinear + islinear || return nothing + # a * x + b = 0 + if eq isa AbstractArray && var isa AbstractArray + x = _my_solve(a, -b, simplify) + else + x = a \ -b + end + simplify || return x + if x isa AbstractArray + SymbolicUtils.simplify.(simplify_fractions.(x)) + else + SymbolicUtils.simplify(simplify_fractions(x)) + end +end + +function _my_solve(A::AbstractMatrix, b::AbstractArray, do_simplify) + #A = Num.(value.(SymbolicUtils.quick_cancel.(A))) + #b = Num.(value.(SymbolicUtils.quick_cancel.(b))) + A = Rational{BigInt}.(value.(SymbolicUtils.quick_cancel.(A))) + b = Rational{BigInt}.(value.(SymbolicUtils.quick_cancel.(b))) + sol = value.(sym_lu(A) \ b) + do_simplify ? SymbolicUtils.simplify_fractions.(sol) : sol +end + +function my_solve_for2(eq, var; simplify=false, check=true) # scalar case + # simplify defaults for `false` as canonicalization should handle most of + # the cases. + a, b, islinear = linear_expansion(eq, var) + check && @assert islinear + islinear || return nothing + # a * x + b = 0 + if eq isa AbstractArray && var isa AbstractArray + x = _my_solve2(a, -b, simplify) + else + x = a \ -b + end + simplify || return x + if x isa AbstractArray + SymbolicUtils.simplify.(simplify_fractions.(x)) + else + SymbolicUtils.simplify(simplify_fractions(x)) + end +end + +function _my_solve2(A::AbstractMatrix, b::AbstractArray, do_simplify) + A = Num.(value.(SymbolicUtils.quick_cancel.(A))) + b = Num.(value.(SymbolicUtils.quick_cancel.(b))) + sol = value.(my_sym_lu2(A) \ b) + do_simplify ? SymbolicUtils.simplify_fractions.(sol) : sol +end + +function my_sym_lu2(A; check=true) + SINGULAR = typemax(Int) + m, n = size(A) + F = map(x->x isa RCNum ? x : Num(x), A) + minmn = min(m, n) + p = Vector{LinearAlgebra.BlasInt}(undef, minmn) + info = 0 + for k = 1:minmn + kp = k + amin = SINGULAR + for i in k:m + absi = _iszero(F[i, k]) ? SINGULAR : nterms(F[i,k]) + if absi < amin + kp = i + amin = absi + end + end + + p[k] = kp + + if amin == SINGULAR && !(amin isa Symbolic) && (amin isa Number) && iszero(info) + info = k + end + + # swap + for j in 1:n + F[k, j], F[kp, j] = F[kp, j], F[k, j] + end + + for i in k+1:m + F[i, k] = F[i, k] // F[k, k] + end + for j = k+1:n + for i in k+1:m + F[i, j] = F[i, j] - F[i, k] * F[k, j] + end + end + end + check && LinearAlgebra.checknonsingular(info) + LU(F, p, convert(LinearAlgebra.BlasInt, info)) +end + """ convert_butcher_tableau_for_moment_kinetics(a, b) @@ -27,11 +136,11 @@ that can be used to calculate an error estimate. Currently assumes the method is explicit, so `a` has no non-zero diagonal or upper-triangular elements. -Returns an array `rk_coeffs` of size `n_rk_stages`x`n_rk_stages` where `size(a) = +Returns an array `rk_coefs` of size `n_rk_stages`x`n_rk_stages` where `size(a) = (n_rk_stages, n_rk_stages)`. """ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) - using_rationals = isa(a[1,1], Rational) + using_rationals = eltype(a) <: Rational n_rk_stages = size(a, 1) if size(b, 1) > 1 adaptive = true @@ -50,191 +159,190 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) # y_out are the same as y, but given as expressions in terms of y and f # k are the RHS evaluations as defined on the Wikipedia page # k_subs are the k evaluated in terms of y by back-substituting the definitions of y. - @variables y[1:n_rk_stages+1] y_out[1:n_rk_stages+1] k[1:n_rk_stages] k_subs[1:n_rk_stages] + @variables y[1:n_rk_stages+1] k[1:n_rk_stages] yn rk_coefs[1:n_rk_stages+1, 1:output_size] y = Symbolics.scalarize(y) - y_out = Symbolics.scalarize(y_out) k = Symbolics.scalarize(k) - k_subs = Symbolics.scalarize(k_subs) + rk_coefs = Symbolics.scalarize(rk_coefs) - if using_rationals - k_subs[1] = (y[2] - y[1]) // a[2,1] - else - k_subs[1] = (y[2] - y[1]) / a[2,1] + # Expressions defined using the 'standard' Butcher formulae + y_k_expressions = [ + yn + (i == 1 ? 0 : sum(a[i,j] * k[j] for j ∈ 1:i-1)) + for i ∈ 1:n_rk_stages + ] + # Final entry of y_k_expressions is y^(n+1) + push!(y_k_expressions, yn + sum(b[1,i] * k[i] for i ∈ 1:n_rk_stages)) + + if adaptive + y_err = sum((b[2,i] - b[1,i]) * k[i] for i ∈ 1:n_rk_stages) end - k_subs[1] = simplify(expand(k_subs[1])) - for i ∈ 2:n_rk_stages-1 - if using_rationals - k_subs[i] = (y[i+1] - y[1] - sum(a[i+1,j]*k_subs[j] for j ∈ 1:i-1)) // a[i+1,i] + + # Define expressions for y[i] using the rk_coefs as used in moment_kinetics + y_rk_coefs_expressions = [ + sum(rk_coefs[j,i-1] * y[j] for j ∈ 1:i-1) + rk_coefs[i,i-1] * (y[i-1] + k[i-1]) + for i ∈ 2:(n_rk_stages + 1) + ] + # Substitute to eliminate y[i] from the expressions + y_rk_coefs_expressions = [ + substitute(e, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) + for e ∈ y_rk_coefs_expressions + ] + if adaptive + y_rk_coefs_err = sum(rk_coefs[j,n_rk_stages+1] * y[j] for j ∈ 1:n_rk_stages+1) + y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + end + + # Construct equations that can be solved for rk_coefs entries by equating the + # coefficients of each k[i] in the two sets of expressions + rk_coefs_equations = [] + for (i, (rk_coefs_expr, Butcher_expr)) ∈ enumerate(zip(y_rk_coefs_expressions, y_k_expressions[2:end])) + lhs = Symbolics.coeff(rk_coefs_expr, yn) + rhs = Symbolics.coeff(Butcher_expr, yn) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[1,i] ~ 0) else - k_subs[i] = (y[i+1] - y[1] - sum(a[i+1,j]*k_subs[j] for j ∈ 1:i-1)) / a[i+1,i] + push!(rk_coefs_equations, lhs ~ rhs) + end + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k[j]) + rhs = Symbolics.coeff(Butcher_expr, k[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[j+1,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end end - k_subs[i] = simplify(expand(k_subs[i])) end - - y_out[1] = y[1] - y_out[2] = y[1] + a[2,1] * k[1] - y_out[2] = simplify(expand(y_out[2])) - for i ∈ 3:n_rk_stages - y_out[i] = y[1] + sum(a[i,j]*k_subs[j] for j ∈ 1:i-2) + a[i,i-1]*k[i-1] - y_out[i] = simplify(expand(y_out[i])) + if adaptive + i = n_rk_stages + 1 + lhs = Symbolics.coeff(y_rk_coefs_err, yn) + rhs = Symbolics.coeff(y_err, yn) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[1,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs) + end + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(y_rk_coefs_err, k[j]) + rhs = Symbolics.coeff(y_err, k[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[j+1,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end end - y_out[n_rk_stages+1] = y[1] + sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1) + - b[1,n_rk_stages]*k[n_rk_stages] - y_out[n_rk_stages+1] = simplify(expand(y_out[n_rk_stages+1])) + # Solve rk_coefs_equations for the rk_coefs entries if using_rationals - k_subs[n_rk_stages] = (y[n_rk_stages+1] - y[1] - - sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1)) // - b[1,n_rk_stages] + rk_coefs_values = my_solve_for(rk_coefs_equations, [rk_coefs...]) else - k_subs[n_rk_stages] = (y[n_rk_stages+1] - y[1] - - sum(b[1,j]*k_subs[j] for j ∈ 1:n_rk_stages-1)) / - b[1,n_rk_stages] + rk_coefs_values = Symbolics.solve_for(rk_coefs_equations, [rk_coefs...]) end - k_subs[n_rk_stages] = simplify(expand(k_subs[n_rk_stages])) - #println("y_out") - #for i ∈ 1:n_rk_stages+1 - # println(y_out[i]) - #end - #println("k") - #for i ∈ 1:n_rk_stages - # println(k_subs[i]) - #end + rk_coefs_values = reshape(rk_coefs_values, n_rk_stages+1, output_size) if low_storage if using_rationals - rk_coeffs = zeros(Rational{Int64}, 3, output_size) + rk_coefs_out = zeros(Rational{Int64}, 3, output_size) else - rk_coeffs = zeros(3, output_size) + rk_coefs_out = zeros(3, output_size) end for i in 1:n_rk_stages - k_coeff = Symbolics.coeff(y_out[i+1], k[i]) - if i == 1 j = i - rk_coeffs[1,i] = Symbolics.coeff(y_out[i+1], y[j]) - #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1]) - #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i]) - # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step' - rk_coeffs[1,i] -= k_coeff - - # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i]) - rk_coeffs[3,i] = k_coeff - #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i]) + rk_coefs_out[1,i] = rk_coefs_values[1,i] + rk_coefs_out[3,i] = rk_coefs_values[2,i] + for j ∈ 3:n_rk_stages+1 + if rk_coefs_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end else j = 1 - rk_coeffs[1,i] = Symbolics.coeff(y_out[i+1], y[j]) - for j ∈ 2:i-2 - if Symbolics.coeff(y_out[i+1], y[j]) != 0 + rk_coefs_out[1,i] = rk_coefs_values[1,i] + for j ∈ 2:i-1 + if rk_coefs_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + rk_coefs_out[2,i] = rk_coefs_values[i,i] + rk_coefs_out[3,i] = rk_coefs_values[i+1,i] + for j ∈ i+2:n_rk_stages+1 + if rk_coefs_values[j,i] != 0 error("Found non-zero coefficient where zero was expected for low-storage coefficients") end end - j = i - rk_coeffs[2,i] = Symbolics.coeff(y_out[i+1], y[j]) - #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1]) - #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i]) - # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step' - rk_coeffs[2,i] -= k_coeff - - # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i]) - rk_coeffs[3,i] = k_coeff - #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i]) end end - - #for i ∈ 1:n_rk_stages - # println("k$i = ", k_subs[i]) - #end if adaptive - error_coefficients = b[2,:] .- b[1,:] - #println("error_coefficients=", error_coefficients) - #println("error coefficients ", error_coefficients) - y_err = sum(error_coefficients[j]*k_subs[j] for j ∈ 1:n_rk_stages) - y_err = simplify(expand(y_err)) - - # Use final column of rk_coeffs to store the coefficients used to calculate the truncation - # error estimate + i = n_rk_stages+1 j = 1 - rk_coeffs[1,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) - for j ∈ 2:n_rk_stages-1 - if Symbolics.coeff(y_err, y[j]) != 0 - error("Found non-zero error coefficient where zero was expected for low-storage coefficients") + rk_coefs_out[1,i] = rk_coefs_values[1,i] + for j ∈ 2:i-2 + if rk_coefs_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") end end - j = n_rk_stages - rk_coeffs[2,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) - j = n_rk_stages + 1 - rk_coeffs[3,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) + rk_coefs_out[2,i] = rk_coefs_values[i-1,i] + rk_coefs_out[3,i] = rk_coefs_values[i,i] end else - if using_rationals - rk_coeffs = zeros(Rational{Int64}, n_rk_stages+1, output_size) - else - rk_coeffs = zeros(n_rk_stages+1, output_size) - end - for i in 1:n_rk_stages - k_coeff = Symbolics.coeff(y_out[i+1], k[i]) - - for j ∈ 1:i - rk_coeffs[j,i] = Symbolics.coeff(y_out[i+1], y[j]) - end - #println("k_coeff=$k_coeff, yout[$i]=", y_out[i+1]) - #println("before rk_coeffs[:,$i]=", rk_coeffs[:,i]) - # Subtract k_coeff because k_coeff*y[i] is included in the 'forward Euler step' - rk_coeffs[i,i] -= k_coeff - - # Coefficient of the result of the 'forward Euler step' (y1 + h*f(y[i]) - rk_coeffs[i+1,i] = k_coeff - #println("after rk_coeffs[:,$i]=", rk_coeffs[:,i]) - end - - #for i ∈ 1:n_rk_stages - # println("k$i = ", k_subs[i]) - #end - if adaptive - error_coefficients = b[2,:] .- b[1,:] - #println("error_coefficients=", error_coefficients) - #println("error coefficients ", error_coefficients) - y_err = sum(error_coefficients[j]*k_subs[j] for j ∈ 1:n_rk_stages) - y_err = simplify(expand(y_err)) - - # Use final column of rk_coeffs to store the coefficients used to calculate the truncation - # error estimate - for j ∈ 1:n_rk_stages+1 - rk_coeffs[j,n_rk_stages+1] = Symbolics.coeff(y_err, y[j]) - end - end + rk_coefs_out = rk_coefs_values end - return rk_coeffs + return rk_coefs_out +end +function convert_butcher_tableau_for_moment_kinetics(a::Matrix{Rational{Int64}}, + b::Matrix{Rational{Int64}}; + low_storage=true) + a = Matrix{Rational{BigInt}}(a) + b = Matrix{Rational{BigInt}}(b) + return convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=low_storage) end -function convert_rk_coeffs_to_butcher_tableau(rkcoeffs::AbstractArray{T,N}) where {T,N} - adaptive = (abs(sum(rkcoeffs[:,end])) < 1.0e-13) - low_storage = size(rkcoeffs, 1) == 3 +function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}) where {T,N} + using_rationals = eltype(rk_coefs) <: Rational + adaptive = (abs(sum(rk_coefs[:,end])) < 1.0e-13) + low_storage = size(rk_coefs, 1) == 3 if adaptive - n_rk_stages = size(rkcoeffs, 2) - 1 + n_rk_stages = size(rk_coefs, 2) - 1 else - n_rk_stages = size(rkcoeffs, 2) + n_rk_stages = size(rk_coefs, 2) end - @variables y[1:n_rk_stages+1] y_out[1:n_rk_stages+1] k[1:n_rk_stages] k_subs[1:n_rk_stages] + @variables y[1:n_rk_stages+1] yn k[1:n_rk_stages] y = Symbolics.scalarize(y) k = Symbolics.scalarize(k) if low_storage - for i ∈ 1:n_rk_stages - y[i+1] = rkcoeffs[1,i]*y[1] + rkcoeffs[2,i]*y[i] + rkcoeffs[3,i]*(y[i] + k[i]) - end + y_expressions = [ + yn, + (rk_coefs[1,i]*y[1] + rk_coefs[2,i]*y[i] + rk_coefs[3,i]*(y[i] + k[i]) + for i ∈ 1:n_rk_stages)... + ] else - for i ∈ 1:n_rk_stages - y[i+1] = sum(rkcoeffs[j,i]*y[j] for j ∈ 1:i) + rkcoeffs[i+1,i]*(y[i] + k[i]) - y[i+1] = simplify(expand(y[i+1])) + y_expressions = [ + yn, + (sum(rk_coefs[j,i]*y[j] for j ∈ 1:i) + rk_coefs[i+1,i]*(y[i] + k[i]) + for i ∈ 1:n_rk_stages)... + ] + end + y_expressions = [simplify(expand(e)) for e ∈ y_expressions] + if adaptive + i = n_rk_stages + 1 + if low_storage + y_err = simplify(expand(rk_coefs[1,i]*y[1] + rk_coefs[2,i]*y[n_rk_stages] + rk_coefs[3,i]*y[n_rk_stages+1])) + else + y_err = simplify(expand(sum(rk_coefs[j,i]*y[j] for j ∈ 1:i))) end end - #for i ∈ 1:n_rk_stages+1 - # println("i=$i, y[$i]=", y[i]) - #end + + # Set up equations to solve for each y[i] in terms of k[i] + y_equations = [y[i] ~ y_expressions[i] for i ∈ 1:n_rk_stages+1] + if using_rationals + y_k_expressions = my_solve_for2(y_equations, y) + else + y_k_expressions = Symbolics.solve_for(y_equations, y) + end if adaptive b = zeros(T, 2, n_rk_stages) @@ -243,29 +351,22 @@ function convert_rk_coeffs_to_butcher_tableau(rkcoeffs::AbstractArray{T,N}) wher end for j ∈ 1:n_rk_stages - b[1, j] = Symbolics.coeff(y[n_rk_stages+1], k[j]) + b[1, j] = Symbolics.coeff(y_k_expressions[n_rk_stages+1], k[j]) end if adaptive - if low_storage - yerr = rkcoeffs[1,n_rk_stages+1]*y[1] + - rkcoeffs[2,n_rk_stages+1]*y[n_rk_stages] + - rkcoeffs[3,n_rk_stages+1]*y[n_rk_stages+1] - else - yerr = sum(rkcoeffs[j,n_rk_stages+1]*y[j] for j ∈ 1:n_rk_stages+1) - end error_coeffs = zeros(T, n_rk_stages) + y_k_err = substitute(y_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + y_k_err = simplify(expand(y_k_err)) for j ∈ 1:n_rk_stages - error_coeffs[j] = Symbolics.coeff(yerr, k[j]) + error_coeffs[j] = Symbolics.coeff(y_k_err, k[j]) end - #println("error_coeffs=", error_coeffs) - # b[2,:] is the lower-order solution @. b[2,:] = error_coeffs + b[1,:] end a = zeros(T, n_rk_stages, n_rk_stages) for i ∈ 1:n_rk_stages for j ∈ 1:n_rk_stages - a[i,j] = Symbolics.coeff(y[i], k[j]) + a[i,j] = Symbolics.coeff(y_k_expressions[i], k[j]) end end @@ -274,63 +375,62 @@ end function convert_and_check_butcher_tableau(name, a, b; low_storage=true) println(name) - rk_coeffs = convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=low_storage) + rk_coefs = convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=low_storage) print("a="); display(a) print("b="); display(b) - print("rk_coeffs="); display(rk_coeffs) + print("rk_coefs="); display(rk_coefs) println("a=$a") println("b=$b") - println("rk_coeffs=$rk_coeffs") + println("rk_coefs=$rk_coefs") println() - check_end = size(rk_coeffs, 2) + check_end = size(rk_coefs, 2) if size(b, 1) > 1 # Adaptive timestep - if abs(sum(rk_coeffs[:,end])) > 1.0e-13 + if abs(sum(rk_coefs[:,end])) > 1.0e-13 error("Sum of error coefficients should be 0") end check_end -= 1 end for i ∈ 1:check_end - if abs(sum(rk_coeffs[:,i]) - 1) > 1.0e-13 + if abs(sum(rk_coefs[:,i]) - 1) > 1.0e-13 error("Sum of RK coefficients should be 1 for each stage") end end # Consistency check: converting back should give the original a, b. - a_check, b_check = convert_rk_coeffs_to_butcher_tableau(rk_coeffs) - #println("check?? ", a_check, " ", b_check) + a_check, b_check = convert_rk_coefs_to_butcher_tableau(rk_coefs) if isa(a[1], Real) if maximum(abs.(a_check .- a)) > 1.0e-13 - error("Converting rk_coeffs back to Butcher tableau gives different 'a':\n" + error("Converting rk_coefs back to Butcher tableau gives different 'a':\n" * "Original: $a\n" * "New: $a_check") end if maximum(abs.(b_check .- b)) > 1.0e-13 - error("Converting rk_coeffs back to Butcher tableau gives different 'b':\n" + error("Converting rk_coefs back to Butcher tableau gives different 'b':\n" * "Original: $b\n" * "New: $b_check") end else if a_check != a - error("Converting rk_coeffs back to Butcher tableau gives different 'a':\n" + error("Converting rk_coefs back to Butcher tableau gives different 'a':\n" * "Original: $a\n" * "New: $a_check") end if b_check != b - error("Converting rk_coeffs back to Butcher tableau gives different 'b':\n" + error("Converting rk_coefs back to Butcher tableau gives different 'b':\n" * "Original: $b\n" * "New: $b_check") end end end -function convert_and_check_rk_coeffs(name, rk_coeffs) +function convert_and_check_rk_coefs(name, rk_coefs) println(name) - print("rk_coeffs="); display(rk_coeffs) - a, b = convert_rk_coeffs_to_butcher_tableau(rk_coeffs) + print("rk_coefs="); display(rk_coefs) + a, b = convert_rk_coefs_to_butcher_tableau(rk_coefs) print("a="); display(a) print("b="); display(b) println("a=$a") @@ -561,7 +661,7 @@ convert_and_check_butcher_tableau( construct_fekete_2nd_order(2)... ) -convert_and_check_rk_coeffs( +convert_and_check_rk_coefs( "mk's ssprk4", [1//2 0 2//3 0 ; 1//2 1//2 0 0 ; @@ -570,7 +670,7 @@ convert_and_check_rk_coeffs( 0 0 0 1//2], ) -convert_and_check_rk_coeffs( +convert_and_check_rk_coefs( "mk's ssprk3", [0 3//4 1//3; 1 0 0 ; @@ -578,7 +678,7 @@ convert_and_check_rk_coeffs( 0 0 2//3], ) -convert_and_check_rk_coeffs( +convert_and_check_rk_coefs( "mk's ssprk2", [0 1//2; 0 0 ; From 3f2745c345600f1075e9d6547f90e13254602145 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 30 Apr 2024 15:00:55 +0100 Subject: [PATCH 24/75] Add IMEX support to calculate_rk_coeffs.jl --- util/calculate_rk_coeffs.jl | 490 +++++++++++++++++++++++++++++++----- 1 file changed, 433 insertions(+), 57 deletions(-) diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl index 5767d7705..c2803edd6 100644 --- a/util/calculate_rk_coeffs.jl +++ b/util/calculate_rk_coeffs.jl @@ -139,8 +139,12 @@ upper-triangular elements. Returns an array `rk_coefs` of size `n_rk_stages`x`n_rk_stages` where `size(a) = (n_rk_stages, n_rk_stages)`. """ -function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) - using_rationals = eltype(a) <: Rational +function convert_butcher_tableau_for_moment_kinetics(a, b, + a_implicit=zeros(size(a)), + b_implicit=zeros(size(b)); + low_storage=true) + using_rationals = eltype(a) <: Rational || eltype(b) <: Rational || eltype(a_implicit) <: Rational || eltype(b_implicit) <: Rational + imex = any(a_implicit .!= 0) n_rk_stages = size(a, 1) if size(b, 1) > 1 adaptive = true @@ -159,46 +163,108 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) # y_out are the same as y, but given as expressions in terms of y and f # k are the RHS evaluations as defined on the Wikipedia page # k_subs are the k evaluated in terms of y by back-substituting the definitions of y. - @variables y[1:n_rk_stages+1] k[1:n_rk_stages] yn rk_coefs[1:n_rk_stages+1, 1:output_size] - y = Symbolics.scalarize(y) + @variables y_tilde[1:n_rk_stages+1] k[1:n_rk_stages] yn rk_coefs[1:n_rk_stages+1, 1:output_size] + @variables y[1:n_rk_stages] k_implicit[1:n_rk_stages] rk_coefs_implicit[1:n_rk_stages, 1:output_size+1] + y_tilde = Symbolics.scalarize(y_tilde) k = Symbolics.scalarize(k) rk_coefs = Symbolics.scalarize(rk_coefs) + y = Symbolics.scalarize(y) + k_implicit = Symbolics.scalarize(k_implicit) + rk_coefs_implicit = Symbolics.scalarize(rk_coefs_implicit) # Expressions defined using the 'standard' Butcher formulae + y_tilde_k_expressions = [ + yn + (i == 1 ? 0 : sum(a[i,j] * k[j] for j ∈ 1:i-1) + sum(a_implicit[i,j] * k_implicit[j] for j ∈ 1:i-1)) + for i ∈ 1:n_rk_stages + ] + # Note that when using an IMEX scheme, if a_implicit[i,i]==0, then k_implicit[i] is + # actually an explicit RHS evaluation (evaluated using y_tilde[i]), and the explicit + # RHS k[i] will be evaluated using y_tilde[i] instead of y[i] so that we can store + # (y_tilde[i] + k_implicit[i]) in y[i], as a way to have k_implicit[i] available. + implicit_coefficient_is_zero = [imex && a_implicit[i,i] == 0 for i ∈ 1:n_rk_stages] y_k_expressions = [ - yn + (i == 1 ? 0 : sum(a[i,j] * k[j] for j ∈ 1:i-1)) + y_tilde_k_expressions[i] + (implicit_coefficient_is_zero[i] ? 1 : a_implicit[i,i]) * k_implicit[i] for i ∈ 1:n_rk_stages ] # Final entry of y_k_expressions is y^(n+1) - push!(y_k_expressions, yn + sum(b[1,i] * k[i] for i ∈ 1:n_rk_stages)) + push!(y_tilde_k_expressions, yn + + sum(b[1,i] * k[i] for i ∈ 1:n_rk_stages) + + sum(b_implicit[1,i] * k_implicit[i] for i ∈ 1:n_rk_stages)) if adaptive - y_err = sum((b[2,i] - b[1,i]) * k[i] for i ∈ 1:n_rk_stages) + y_err = sum((b[2,i] - b[1,i]) * k[i] for i ∈ 1:n_rk_stages) + + sum((b_implicit[2,i] - b_implicit[1,i]) * k_implicit[i] for i ∈ 1:n_rk_stages) end - # Define expressions for y[i] using the rk_coefs as used in moment_kinetics + # Define expressions for y_tilde[i] using the rk_coefs as used in moment_kinetics + # Note that we need a special case for an imex scheme with some a[i,i]=0, as for those + # entries we hacked y[i] to allow k_implicit[i] to be saved, and we need to use + # y_tilde[i] as the starting point for the forward-Euler derivative instead of y[i]. + y_tilde_rk_coefs_expressions = [ + yn, # i=1 + (sum(rk_coefs[j,i-1] * y_tilde[j] for j ∈ 1:i-1) + + rk_coefs[i,i-1] * ((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1]) + + sum(rk_coefs_implicit[j,i] * y[j] for j ∈ 1:i-1) + for i ∈ 2:n_rk_stages+1)... + ] + # Note the 'implicit step' is treated specially, as the coefficient will be used to + # scale the timestep in the code, rather than as the coefficient of some version of + # y/y_tilde. rk_coefs_implicit[i,i] should end up being equal to a_implicit[i,i]. + y_rk_coefs_expressions = [ + e + rk_coefs_implicit[i,i] * k_implicit[i] + for (i,e) ∈ enumerate(y_tilde_rk_coefs_expressions[1:n_rk_stages]) + ] + + # Substitute to eliminate y_tilde[i] from the expressions + y_tilde_rk_coefs_expressions = [ + substitute(e, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + for e ∈ y_tilde_rk_coefs_expressions + ] y_rk_coefs_expressions = [ - sum(rk_coefs[j,i-1] * y[j] for j ∈ 1:i-1) + rk_coefs[i,i-1] * (y[i-1] + k[i-1]) - for i ∈ 2:(n_rk_stages + 1) + substitute(e, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + for e ∈ y_rk_coefs_expressions ] + + # Substitute to eliminate y[i] from the expressions + y_tilde_rk_coefs_expressions = [ + substitute(e, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) + for e ∈ y_tilde_rk_coefs_expressions + ] y_rk_coefs_expressions = [ substitute(e, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) for e ∈ y_rk_coefs_expressions ] + if adaptive - y_rk_coefs_err = sum(rk_coefs[j,n_rk_stages+1] * y[j] for j ∈ 1:n_rk_stages+1) - y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + y_rk_coefs_err = sum(rk_coefs[j,n_rk_stages+1] * y_tilde[j] for j ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[j,n_rk_stages+2] * y[j] for j ∈ 1:n_rk_stages) + y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + y_rk_coefs_err = substitute(y_rk_coefs_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) end # Construct equations that can be solved for rk_coefs entries by equating the - # coefficients of each k[i] in the two sets of expressions + # coefficients of each k[i], k_implicit[i] in the two sets of expressions rk_coefs_equations = [] - for (i, (rk_coefs_expr, Butcher_expr)) ∈ enumerate(zip(y_rk_coefs_expressions, y_k_expressions[2:end])) + for (i, (rk_coefs_expr, Butcher_expr)) ∈ enumerate(zip(y_rk_coefs_expressions, y_k_expressions)) + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k_implicit[j]) + rhs = Symbolics.coeff(Butcher_expr, k_implicit[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end + if i == 1 + # EXplicit RK coefficients have no entries for i=1, because y_tilde[1]=yn + # always. + continue + end lhs = Symbolics.coeff(rk_coefs_expr, yn) rhs = Symbolics.coeff(Butcher_expr, yn) if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 - push!(rk_coefs_equations, rk_coefs[1,i] ~ 0) + push!(rk_coefs_equations, rk_coefs[1,i-1] ~ 0) else push!(rk_coefs_equations, lhs ~ rhs) end @@ -206,12 +272,43 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) lhs = Symbolics.coeff(rk_coefs_expr, k[j]) rhs = Symbolics.coeff(Butcher_expr, k[j]) if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 - push!(rk_coefs_equations, rk_coefs[j+1,i] ~ 0) + push!(rk_coefs_equations, rk_coefs[j+1,i-1] ~ 0) else push!(rk_coefs_equations, lhs ~ rhs + 0) end end end + + # Include contribution from y_tilde[n_rk_stages+1] + i = n_rk_stages + 1 + rk_coefs_expr = y_tilde_rk_coefs_expressions[n_rk_stages+1] + Butcher_expr = y_tilde_k_expressions[n_rk_stages+1] + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k_implicit[j]) + rhs = Symbolics.coeff(Butcher_expr, k_implicit[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end + lhs = Symbolics.coeff(rk_coefs_expr, yn) + rhs = Symbolics.coeff(Butcher_expr, yn) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[1,i-1] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs) + end + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(rk_coefs_expr, k[j]) + rhs = Symbolics.coeff(Butcher_expr, k[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs[j+1,i-1] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end + if adaptive i = n_rk_stages + 1 lhs = Symbolics.coeff(y_rk_coefs_err, yn) @@ -230,21 +327,34 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) push!(rk_coefs_equations, lhs ~ rhs + 0) end end + i = n_rk_stages + 2 + for j ∈ 1:n_rk_stages + lhs = Symbolics.coeff(y_rk_coefs_err, k_implicit[j]) + rhs = Symbolics.coeff(y_err, k_implicit[j]) + if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 + push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0) + else + push!(rk_coefs_equations, lhs ~ rhs + 0) + end + end end # Solve rk_coefs_equations for the rk_coefs entries if using_rationals - rk_coefs_values = my_solve_for(rk_coefs_equations, [rk_coefs...]) + rk_coefs_values = my_solve_for(rk_coefs_equations, [rk_coefs..., rk_coefs_implicit...]) else - rk_coefs_values = Symbolics.solve_for(rk_coefs_equations, [rk_coefs...]) + rk_coefs_values = Symbolics.solve_for(rk_coefs_equations, [rk_coefs..., rk_coefs_implicit...]) end - rk_coefs_values = reshape(rk_coefs_values, n_rk_stages+1, output_size) + rk_coefs_implicit_values = reshape(rk_coefs_values[(n_rk_stages+1)*output_size+1:end], n_rk_stages, output_size+1) + rk_coefs_values = reshape(rk_coefs_values[1:(n_rk_stages+1)*output_size], n_rk_stages+1, output_size) if low_storage if using_rationals rk_coefs_out = zeros(Rational{Int64}, 3, output_size) + rk_coefs_implicit_out = zeros(Rational{Int64}, 3, output_size+1) else rk_coefs_out = zeros(3, output_size) + rk_coefs_implicit_out = zeros(3, output_size+1) end for i in 1:n_rk_stages if i == 1 @@ -273,6 +383,37 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) end end end + for i in 1:n_rk_stages + if i == 1 + j = i + rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i] + rk_coefs_implicit_out[3,i] = rk_coefs_implicit_values[2,i] + for j ∈ 3:n_rk_stages + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + else + j = 1 + rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i] + for j ∈ 2:i-1 + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + rk_coefs_implicit_out[2,i] = rk_coefs_implicit_values[i,i] + if i == n_rk_stages + rk_coefs_implicit_out[3,i] = 0 + else + rk_coefs_implicit_out[3,i] = rk_coefs_implicit_values[i+1,i] + end + for j ∈ i+2:n_rk_stages + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + end + end if adaptive i = n_rk_stages+1 j = 1 @@ -284,157 +425,311 @@ function convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=true) end rk_coefs_out[2,i] = rk_coefs_values[i-1,i] rk_coefs_out[3,i] = rk_coefs_values[i,i] + + j = 1 + rk_coefs_implicit_out[1,i] = rk_coefs_implicit_values[1,i] + for j ∈ 2:i-2 + if rk_coefs_implicit_values[j,i] != 0 + error("Found non-zero coefficient where zero was expected for low-storage coefficients") + end + end + j = n_rk_stages + rk_coefs_implicit_out[2,i] = rk_coefs_implicit_values[j,i] + rk_coefs_implicit_out[3,i] = 0 #rk_coefs_implicit_values[j+1,i] end else rk_coefs_out = rk_coefs_values + rk_coefs_implicit_out = rk_coefs_implicit_values end - return rk_coefs_out + return rk_coefs_out, rk_coefs_implicit_out, implicit_coefficient_is_zero end function convert_butcher_tableau_for_moment_kinetics(a::Matrix{Rational{Int64}}, - b::Matrix{Rational{Int64}}; + b::Matrix{Rational{Int64}}, + a_implicit::Matrix{Rational{Int64}}=zeros(Rational{Int64}, size(a)), + b_implicit::Matrix{Rational{Int64}}=zeros(Rational{Int64}, size(b)); low_storage=true) a = Matrix{Rational{BigInt}}(a) b = Matrix{Rational{BigInt}}(b) - return convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=low_storage) + a_implicit = Matrix{Rational{BigInt}}(a_implicit) + b_implicit = Matrix{Rational{BigInt}}(b_implicit) + return convert_butcher_tableau_for_moment_kinetics(a, b, a_implicit, b_implicit; + low_storage=low_storage) end -function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}) where {T,N} - using_rationals = eltype(rk_coefs) <: Rational - adaptive = (abs(sum(rk_coefs[:,end])) < 1.0e-13) +function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}, + rk_coefs_implicit=zeros(T, size(rk_coefs, 1) - 1, size(rk_coefs, 2) + 1), + implicit_coefficient_is_zero=nothing + ) where {T,N} + using_rationals = eltype(rk_coefs) <: Rational || eltype(rk_coefs_implicit) <: Rational + adaptive = (abs(sum(rk_coefs[:,end]) + sum(rk_coefs_implicit[:,end])) < 1.0e-13) low_storage = size(rk_coefs, 1) == 3 if adaptive n_rk_stages = size(rk_coefs, 2) - 1 else n_rk_stages = size(rk_coefs, 2) end + if implicit_coefficient_is_zero === nothing + implicit_coefficient_is_zero = zeros(Bool, n_rk_stages) + end - @variables y[1:n_rk_stages+1] yn k[1:n_rk_stages] - y = Symbolics.scalarize(y) + @variables y_tilde[1:n_rk_stages+1] yn k[1:n_rk_stages] + y_tilde = Symbolics.scalarize(y_tilde) k = Symbolics.scalarize(k) + @variables y[1:n_rk_stages] k_implicit[1:n_rk_stages] + y = Symbolics.scalarize(y) + k_implicit = Symbolics.scalarize(k_implicit) if low_storage + y_tilde_expressions = [ + yn, + (rk_coefs[1,i-1]*y_tilde[1] + rk_coefs[2,i-1]*y_tilde[i-1] + + rk_coefs[3,i-1]*((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1]) + + rk_coefs_implicit[1,i]*y[1] + rk_coefs_implicit[2,i]*y[i-1] + for i ∈ 2:n_rk_stages+1)... + ] y_expressions = [ - yn, - (rk_coefs[1,i]*y[1] + rk_coefs[2,i]*y[i] + rk_coefs[3,i]*(y[i] + k[i]) - for i ∈ 1:n_rk_stages)... + y_tilde_expressions[i] + rk_coefs_implicit[3,i] * k_implicit[i] + for i ∈ 1:n_rk_stages ] else + y_tilde_expressions = [ + yn, + (sum(rk_coefs[j,i-1]*y_tilde[j] for j ∈ 1:i-1) + + rk_coefs[i,i-1]*((implicit_coefficient_is_zero[i-1] ? y_tilde[i-1] : y[i-1]) + k[i-1]) + + sum(rk_coefs_implicit[j,i]*y[j] for j ∈ 1:i-1) + for i ∈ 2:n_rk_stages+1)... + ] y_expressions = [ - yn, - (sum(rk_coefs[j,i]*y[j] for j ∈ 1:i) + rk_coefs[i+1,i]*(y[i] + k[i]) - for i ∈ 1:n_rk_stages)... + y_tilde_expressions[i] + rk_coefs_implicit[i,i] * k_implicit[i] + for i ∈ 1:n_rk_stages ] end + y_tilde_expressions = [simplify(expand(e)) for e ∈ y_tilde_expressions] y_expressions = [simplify(expand(e)) for e ∈ y_expressions] if adaptive - i = n_rk_stages + 1 if low_storage - y_err = simplify(expand(rk_coefs[1,i]*y[1] + rk_coefs[2,i]*y[n_rk_stages] + rk_coefs[3,i]*y[n_rk_stages+1])) + i = n_rk_stages + 1 + y_err = rk_coefs[1,i]*y_tilde[1] + rk_coefs[2,i]*y_tilde[n_rk_stages] + rk_coefs[3,i]*y_tilde[n_rk_stages+1] + + rk_coefs_implicit[1,i+1]*y[1] + rk_coefs_implicit[2,i+1]*y[n_rk_stages-1] + rk_coefs_implicit[3,i+1]*y[n_rk_stages] else - y_err = simplify(expand(sum(rk_coefs[j,i]*y[j] for j ∈ 1:i))) + y_err = sum(rk_coefs[j,n_rk_stages+1]*y_tilde[j] for j ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[j,n_rk_stages+2]*y[j] for j ∈ 1:n_rk_stages) end + y_err = simplify(expand(y_err)) end - # Set up equations to solve for each y[i] in terms of k[i] - y_equations = [y[i] ~ y_expressions[i] for i ∈ 1:n_rk_stages+1] + # Set up equations to solve for each y_tilde[i] and y[i] in terms of k[i] and + # k_impliti[i] + y_tilde_equations = [y_tilde[i] ~ y_tilde_expressions[i] for i ∈ 1:n_rk_stages+1] + y_equations = [y[i] ~ y_expressions[i] for i ∈ 1:n_rk_stages] + equations = vcat(y_tilde_equations, y_equations) if using_rationals - y_k_expressions = my_solve_for2(y_equations, y) + expressions = my_solve_for2(equations, vcat(y_tilde, y)) else - y_k_expressions = Symbolics.solve_for(y_equations, y) + expressions = Symbolics.solve_for(equations, vcat(y_tilde, y)) end + y_tilde_k_expressions = expressions[1:n_rk_stages+1] + y_k_expressions = expressions[n_rk_stages+2:end] if adaptive b = zeros(T, 2, n_rk_stages) + b_implicit = zeros(T, 2, n_rk_stages) else b = zeros(T, 1, n_rk_stages) + b_implicit = zeros(T, 1, n_rk_stages) end for j ∈ 1:n_rk_stages - b[1, j] = Symbolics.coeff(y_k_expressions[n_rk_stages+1], k[j]) + b[1, j] = Symbolics.coeff(y_tilde_k_expressions[n_rk_stages+1], k[j]) + b_implicit[1, j] = Symbolics.coeff(y_tilde_k_expressions[n_rk_stages+1], k_implicit[j]) end if adaptive error_coeffs = zeros(T, n_rk_stages) - y_k_err = substitute(y_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + error_coeffs_implicit = zeros(T, n_rk_stages) + y_k_err = substitute(y_err, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + y_k_err = substitute(y_k_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) y_k_err = simplify(expand(y_k_err)) for j ∈ 1:n_rk_stages error_coeffs[j] = Symbolics.coeff(y_k_err, k[j]) + error_coeffs_implicit[j] = Symbolics.coeff(y_k_err, k_implicit[j]) end @. b[2,:] = error_coeffs + b[1,:] + @. b_implicit[2,:] = error_coeffs_implicit + b_implicit[1,:] end a = zeros(T, n_rk_stages, n_rk_stages) + a_implicit = zeros(T, n_rk_stages, n_rk_stages) for i ∈ 1:n_rk_stages for j ∈ 1:n_rk_stages a[i,j] = Symbolics.coeff(y_k_expressions[i], k[j]) + if j == i && implicit_coefficient_is_zero[i] + a_implicit[i,j] = 0 + else + a_implicit[i,j] = Symbolics.coeff(y_k_expressions[i], k_implicit[j]) + end end end - return a, b + return a, b, a_implicit, b_implicit end -function convert_and_check_butcher_tableau(name, a, b; low_storage=true) +function convert_and_check_butcher_tableau(name, a, b, + a_implicit=zeros(eltype(a), size(a)), + b_implicit=zeros(eltype(b), size(b)); + low_storage=true) + imex = any(a_implicit .!= 0) || any(b_implicit .!= 0) + println(name) - rk_coefs = convert_butcher_tableau_for_moment_kinetics(a, b; low_storage=low_storage) + rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero = + convert_butcher_tableau_for_moment_kinetics(a, b, a_implicit, b_implicit; + low_storage=low_storage) print("a="); display(a) print("b="); display(b) + if imex + print("a_implicit="); display(a_implicit) + print("b_implicit="); display(b_implicit) + end print("rk_coefs="); display(rk_coefs) + if imex + print("rk_coefs_implicit="); display(rk_coefs_implicit) + end + print("rk_coefs(Float64)="); display(Float64.(rk_coefs)) + if imex + print("rk_coefs_implicit(Float64)="); display(Float64.(rk_coefs_implicit)) + end println("a=$a") println("b=$b") + if imex + println("a_implicit=$a_implicit") + println("b_implicit=$b_implicit") + end println("rk_coefs=$rk_coefs") + if imex + println("rk_coefs_implicit=$rk_coefs_implicit") + println("implicit_coefficient_is_zero=$implicit_coefficient_is_zero") + end println() check_end = size(rk_coefs, 2) if size(b, 1) > 1 # Adaptive timestep - if abs(sum(rk_coefs[:,end])) > 1.0e-13 - error("Sum of error coefficients should be 0") + error_sum = sum(rk_coefs[:,end]) + sum(rk_coefs_implicit[:,end]) + if abs(error_sum) > 1.0e-13 + error("Sum of error coefficients should be 0. Got ", error_sum, " ≈ ", Float64(error_sum)) end check_end -= 1 end for i ∈ 1:check_end - if abs(sum(rk_coefs[:,i]) - 1) > 1.0e-13 - error("Sum of RK coefficients should be 1 for each stage") + if low_storage + error_sum = sum(rk_coefs[:,i]) + sum(rk_coefs_implicit[:,i+1]) + else + error_sum = sum(rk_coefs[:,i]) + sum(rk_coefs_implicit[1:i,i+1]) + end + if abs(error_sum - 1) > 1.0e-13 + error("Sum of RK coefficients should be 1 for each stage. Got ", error_sum, " ≈ ", Float64(error_sum)) + end + end + if imex + check_end_implicit = size(rk_coefs_implicit, 2) + if size(b_implicit, 1) > 1 + # Adaptive timestep + check_end_implicit -= 1 + end + for i ∈ 1:check_end_implicit - 1 + if !all(abs.(rk_coefs_implicit[i+1:end,i]) .< 1.0e-13) + error("Implicit RK coefficients should be 0 for j>i. Got ", rk_coefs_implicit[i+1:end,i], " ≈ ", Float64.(rk_coefs_implicit[i+1:end,i])) + end + end + for i ∈ 1:check_end_implicit - 1 + if a_implicit[i,i] == 0 + if rk_coefs_implicit[i,i] != 1 + error("Diagonal RK coefficient should be 1 when a_implicit[$i,$i]=0, got rk_coefs_implicit[$i,$i]=", rk_coefs_implicit[i,i]) + end + elseif abs(rk_coefs_implicit[i,i] - a_implicit[i,i]) > 1.0e-13 + error("Diagonal RK coefficient should be equal to a_implicit[i,i] for each stage. Got rk_coefs_implicit[$i,$i]=", rk_coefs_implicit[i,i] - a_implicit[i,i], " a_implicit[$i,$i]=", a_implicit[i,i]) + end end end # Consistency check: converting back should give the original a, b. - a_check, b_check = convert_rk_coefs_to_butcher_tableau(rk_coefs) + a_check, b_check, a_check_implicit, b_check_implicit = + convert_rk_coefs_to_butcher_tableau(rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero) - if isa(a[1], Real) - if maximum(abs.(a_check .- a)) > 1.0e-13 + if eltype(a) == Rational + if a_check != a error("Converting rk_coefs back to Butcher tableau gives different 'a':\n" * "Original: $a\n" * "New: $a_check") end - if maximum(abs.(b_check .- b)) > 1.0e-13 + if b_check != b error("Converting rk_coefs back to Butcher tableau gives different 'b':\n" * "Original: $b\n" * "New: $b_check") end else - if a_check != a + if maximum(abs.(a_check .- a)) > 1.0e-13 error("Converting rk_coefs back to Butcher tableau gives different 'a':\n" * "Original: $a\n" * "New: $a_check") end - if b_check != b + if maximum(abs.(b_check .- b)) > 1.0e-13 error("Converting rk_coefs back to Butcher tableau gives different 'b':\n" * "Original: $b\n" * "New: $b_check") end end + if eltype(a_implicit) == Rational + if a_check_implicit != a_implicit + error("Converting rk_coefs back to Butcher tableau gives different 'a_implicit':\n" + * "Original: $a_implicit\n" + * "New: $a_check_implicit") + end + if b_check_implicit != b_implicit + error("Converting rk_coefs back to Butcher tableau gives different 'b_implicit':\n" + * "Original: $b_implicit\n" + * "New: $b_check_implicit") + end + else + if maximum(abs.(a_check_implicit .- a_implicit)) > 1.0e-13 + error("Converting rk_coefs back to Butcher tableau gives different 'a_implicit':\n" + * "Original: $a_implicit\n" + * "New: $a_check_implicit") + end + if maximum(abs.(b_check_implicit .- b_implicit)) > 1.0e-13 + error("Converting rk_coefs back to Butcher tableau gives different 'b_implicit':\n" + * "Original: $b_implicit\n" + * "New: $b_check_implicit") + end + end end -function convert_and_check_rk_coefs(name, rk_coefs) +function convert_and_check_rk_coefs(name, rk_coefs, + rk_coefs_implicit=zeros(eltype(rk_coefs), + size(rk_coefs, 1), + size(rk_coefs, 2) + 1), + implicit_coefficient_is_zero=nothing) + imex = any(rk_coefs_implicit .!= 0) + println(name) print("rk_coefs="); display(rk_coefs) - a, b = convert_rk_coefs_to_butcher_tableau(rk_coefs) + if imex + print("rk_coefs_implicit="); display(rk_coefs_implicit) + end + a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero) print("a="); display(a) print("b="); display(b) + if imex + print("a_implicit="); display(a_implicit) + print("b_implicit="); display(b_implicit) + end println("a=$a") println("b=$b") + if imex + println("a_implicit=$a_implicit") + println("b_implicit=$b_implicit") + end println() end @@ -684,3 +979,84 @@ convert_and_check_rk_coefs( 0 0 ; 1 1//2], ) + +println("\n\nIMEX methods\n============\n") + +# 4th-order, 7-stage IMEX method 'ARK4(3)7L[2]SA₁' from Kennedy & Carpenter 2019 +# (https://doi.org/10.1016/j.apnum.2018.10.007) +convert_and_check_butcher_tableau( + "KennedyCarpenterARK437", + Rational{BigInt}[0 0 0 0 0 0 0; + 247//1000 0 0 0 0 0 0; + 247//4000 2694949928731//7487940209513 0 0 0 0 0; + 464650059369//8764239774964 878889893998//2444806327765 -952945855348//12294611323341 0 0 0 0; + 476636172619//8159180917465 -1271469283451//7793814740893 -859560642026//4356155882851 1723805262919//4571918432560 0 0 0; + 6338158500785//11769362343261 -4970555480458//10924838743837 3326578051521//2647936831840 -880713585975//1841400956686 -1428733748635//8843423958496 0 0; + 760814592956//3276306540349 760814592956//3276306540349 -47223648122716//6934462133451 71187472546993//9669769126921 -13330509492149//9695768672337 11565764226357//8513123442827 0; + ], + Rational{BigInt}[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000 ; + 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + Rational{BigInt}[0 0 0 0 0 0 0 ; + 1235//10000 1235//10000 0 0 0 0 0 ; + 624185399699//4186980696204 624185399699//4186980696204 1235//10000 0 0 0 0 ; + 1258591069120//10082082980243 1258591069120//10082082980243 -322722984531//8455138723562 1235//10000 0 0 0 ; + -436103496990//5971407786587 -436103496990//5971407786587 -2689175662187//11046760208243 4431412449334//12995360898505 1235//10000 0 0 ; + -2207373168298//14430576638973 -2207373168298//14430576638973 242511121179//3358618340039 3145666661981//7780404714551 5882073923981//14490790706663 1235//10000 0 ; + 0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 1235//10000; + ], + Rational{BigInt}[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000 ; + 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + ; low_storage=false) + +# The 5th order KennedyCarpenter548 method seems to be missing the 8'th row of a_implicit +# coefficients in the Kennedy&Carpenter2019 paper, so this is not correct. +## 5th-order, 8-stage IMEX method 'ARK5(4)8L[2]SA₂' from Kennedy & Carpenter 2019 +## (https://doi.org/10.1016/j.apnum.2018.10.007) +#convert_and_check_butcher_tableau( +# "KennedyCarpenterARK548", +# Rational{BigInt}[ 0 0 0 0 0 0 0 0; +# 4//9 0 0 0 0 0 0 0; +# 1//9 1183333538310//1827251437969 0 0 0 0 0 0; +# 895379019517//9750411845327 477606656805//13473228687314 -112564739183//9373365219272 0 0 0 0 0; +# -4458043123994//13015289567637 -2500665203865//9342069639922 983347055801//8893519644487 2185051477207//2551468980502 0 0 0 0; +# -167316361917//17121522574472 1605541814917//7619724128744 991021770328//13052792161721 2342280609577//11279663441611 3012424348531//12792462456678 0 0 0; +# 6680998715867//14310383562358 5029118570809//3897454228471 2415062538259//6382199904604 -3924368632305//6964820224454 -4331110370267//15021686902756 -3944303808049//11994238218192 0 0; +# 2193717860234//3570523412979 2193717860234//3570523412979 5952760925747//18750164281544 -4412967128996//6196664114337 4151782504231//36106512998704 572599549169//6265429158920 -457874356192//11306498036315 0; +# ], +# Rational{BigInt}[ 0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9 ; +# 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926; +# ], +# Rational{BigInt}[ 0 0 0 0 0 0 0 0 ; +# 2//9 2//9 0 0 0 0 0 0 ; +# 2366667076620//8822750406821 2366667076620//8822750406821 2//9 0 0 0 0 0 ; +# -257962897183//4451812247028 -257962897183//4451812247028 128530224461//14379561246022 2//9 0 0 0 0 ; +# -486229321650//11227943450093 -486229321650//11227943450093 -225633144460//6633558740617 1741320951451//6824444397158 2//9 0 0 0 ; +# 621307788657//4714163060173 621307788657//4714163060173 -125196015625//3866852212004 940440206406//7593089888465 961109811699//6734810228204 2//9 0 0 ; +# 2036305566805//6583108094622 2036305566805//6583108094622 -3039402635899//4450598839912 -1829510709469//31102090912115 -286320471013//6931253422520 8651533662697//9642993110008 2//9 0 ; +# 0 0 0 0 0 0 0 2//9; +# ], +# Rational{BigInt}[ 0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9 ; +# 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926; +# ], +# ; low_storage=false) + +# 3rd-order, 4-stage IMEX method from Kennedy & Carpenter 2003 +# (https://doi.org/10.1016/S0168-9274(02)00138-1, +# https://ntrs.nasa.gov/api/citations/20010075154/downloads/20010075154.pdf) +convert_and_check_butcher_tableau( + "KennedyCarpenterARK324", + Rational{BigInt}[0 0 0 0; + 1767732205903//2027836641118 0 0 0; + 5535828885825//10492691773637 788022342437//10882634858940 0 0; + 6485989280629//16251701735622 -4246266847089//9704473918619 10755448449292//10357097424841 0; + ], + Rational{BigInt}[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; + 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], + Rational{BigInt}[0 0 0 0 ; + 1767732205903//4055673282236 1767732205903//4055673282236 0 0 ; + 2746238789719//10658868560708 -640167445237//6845629431997 1767732205903//4055673282236 0 ; + 1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; + ], + Rational{BigInt}[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; + 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], + ; low_storage=false) From 9d7e2f88fbf6af79cd84586e47632f98f3d1c4da Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 1 May 2024 11:23:45 +0100 Subject: [PATCH 25/75] Support IMEX schemes in test-rk-timestep.jl --- util/test-rk-timestep.jl | 227 ++++++++++++++++++++++++++++++++------- 1 file changed, 191 insertions(+), 36 deletions(-) diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl index 5ca7add5a..1ce3b7832 100644 --- a/util/test-rk-timestep.jl +++ b/util/test-rk-timestep.jl @@ -6,19 +6,34 @@ function f(y) return y #return 1.0 end +function f_implicit(y, dt) + # Calculate derivative at the end of a 'backward Euler' step so that + # (y_out - y)/dt = f(y_out) + # ⇒ y_out - y = dt * f(y_out) = dt * y_out + # ⇒ y_out = y / (1 - dt) + return f(y/(1 - dt)) +end +function backward_euler(y, dt) + # Do a 'backward Euler' solve so that + # (y_out - y)/dt = f(y_out) + # ⇒ y_out - y = dt * f(y_out) = dt * y_out + # ⇒ y_out = y / (1 - dt) + return y / (1 - dt) +end y0 = 1.0 nsteps = 100 * multiplier t = [i*dt for i ∈ 0:nsteps] analytic = @. y0*exp(t) +analytic_implicit = @. y0*exp(2*t) #analytic = @. 1.0 + t -function rk_advance(rk_coeffs, y0, dt, nsteps) - n_rk_stages = size(rk_coeffs, 1) - 1 - #println("n_rk_stages=$n_rk_stages, ", size(rk_coeffs)) +function rk_advance_explicit(rk_coefs, y0, dt, nsteps) + n_rk_stages = size(rk_coefs, 1) - 1 + #println("n_rk_stages=$n_rk_stages, ", size(rk_coefs)) yscratch = zeros(n_rk_stages + 1) yscratch[1] = y0 - adaptive = size(rk_coeffs, 2) > n_rk_stages + adaptive = size(rk_coefs, 2) > n_rk_stages result = zeros(nsteps+1) result[1] = y0 @@ -28,8 +43,57 @@ function rk_advance(rk_coeffs, y0, dt, nsteps) for it ∈ 1:nsteps for istage ∈ 1:n_rk_stages yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage]) - this_coeffs = rk_coeffs[:,istage] - yscratch[istage+1] = sum(this_coeffs[i]*yscratch[i] for i ∈ 1:istage+1) + this_coefs = rk_coefs[:,istage] + yscratch[istage+1] = sum(this_coefs[i]*yscratch[i] for i ∈ 1:istage+1) + end + #k1 = 2*(yscratch[2] - yscratch[1]) + #k2 = 2*(yscratch[3] - yscratch[1]) + #k3 = yscratch[4] - yscratch[1] + #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3 + #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4) + if adaptive + error[it+1] = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + end + yscratch[1] = yscratch[end] + result[it+1] = yscratch[end] + end + + return result, error +end + +function rk_advance(rk_coefs, y0, dt, nsteps, rk_coefs_implicit=nothing, implicit_coefficient_is_zero=nothing) + + n_rk_stages = size(rk_coefs, 1) - 1 + + if rk_coefs_implicit === nothing && implicit_coefficient_is_zero === nothing + rk_coefs_implicit = zeros(n_rk_stages, n_rk_stages + 2) + implicit_coefficient_is_zero = zeros(Bool, n_rk_stages) + end + + #println("n_rk_stages=$n_rk_stages, ", size(rk_coefs)) + yscratch = zeros(n_rk_stages + 1) + yscratch_implicit = zeros(n_rk_stages) + yscratch[1] = y0 + adaptive = size(rk_coefs, 2) > n_rk_stages + + result = zeros(nsteps+1) + result[1] = y0 + + error = zeros(nsteps+1) + + for it ∈ 1:nsteps + for istage ∈ 1:n_rk_stages + if implicit_coefficient_is_zero[istage] + yscratch_implicit[istage] = yscratch[istage] + dt*f_implicit(yscratch[istage], 0.0) + yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage]) + else + yscratch_implicit[istage] = backward_euler(yscratch[istage], dt*rk_coefs_implicit[istage,istage]) + yscratch[istage+1] = yscratch_implicit[istage] + dt*f(yscratch_implicit[istage]) + end + this_coefs = rk_coefs[:,istage] + this_coefs_implicit = rk_coefs_implicit[:,istage+1] + yscratch[istage+1] = sum(this_coefs[i]*yscratch[i] for i ∈ 1:istage+1) + + sum(this_coefs_implicit[i]*yscratch_implicit[i] for i ∈ 1:istage) end #k1 = 2*(yscratch[2] - yscratch[1]) #k2 = 2*(yscratch[3] - yscratch[1]) @@ -37,7 +101,8 @@ function rk_advance(rk_coeffs, y0, dt, nsteps) #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3 #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4) if adaptive - error[it+1] = sum(rk_coeffs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + error[it+1] = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[i, n_rk_stages+2]*yscratch_implicit[i] for i ∈ 1:n_rk_stages) end yscratch[1] = yscratch[end] result[it+1] = yscratch[end] @@ -46,8 +111,8 @@ function rk_advance(rk_coeffs, y0, dt, nsteps) return result, error end -function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps) - n_rk_stages = size(rk_coeffs, 2) +function rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps) + n_rk_stages = size(rk_coefs, 2) println("check n_rk_stages=$n_rk_stages") yscratch = zeros(n_rk_stages + 1) @@ -59,9 +124,9 @@ function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps) for it ∈ 1:nsteps for istage ∈ 1:n_rk_stages yscratch[istage+1] = yscratch[istage] + dt*f(yscratch[istage]) - this_coeffs = rk_coeffs[:,istage] - #println("istage=$istage, this_coeffs=$this_coeffs") - yscratch[istage+1] = this_coeffs[1]*yscratch[1] + this_coeffs[2]*yscratch[istage] + this_coeffs[3]*yscratch[istage+1] + this_coefs = rk_coefs[:,istage] + #println("istage=$istage, this_coefs=$this_coefs") + yscratch[istage+1] = this_coefs[1]*yscratch[1] + this_coefs[2]*yscratch[istage] + this_coefs[3]*yscratch[istage+1] #println("istage=$istage, ", yscratch[istage+1]) end #println("before yscratch=$yscratch") @@ -73,7 +138,7 @@ function rk_advance_non_adaptive(rk_coeffs, y0, dt, nsteps) return result end -function rk_advance_butcher(a, b, y0, dt, nsteps) +function rk_advance_butcher_explicit(a, b, y0, dt, nsteps) n_rk_stages = size(a, 2) kscratch = zeros(n_rk_stages) y = y0 @@ -105,6 +170,55 @@ function rk_advance_butcher(a, b, y0, dt, nsteps) return result, error end +function rk_advance_butcher(a, b, y0, dt, nsteps, a_implicit=nothing, b_implicit=nothing) + n_rk_stages = size(a, 2) + + if a_implicit === nothing && b_implicit === nothing + a_implicit = zeros(n_rk_stages, n_rk_stages) + b_implicit = zeros(size(b)) + end + + kscratch = zeros(n_rk_stages) + kscratch_implicit = zeros(n_rk_stages) + y = y0 + if ndims(b) == 1 + b = b' + end + adaptive = size(b, 1) > 1 + + result = zeros(nsteps+1) + result[1] = y0 + + error = zeros(nsteps+1) + + for it ∈ 1:nsteps + kscratch[1] = dt*f(y) + kscratch_implicit[1] = dt*f_implicit(y, a_implicit[1,1] * dt) + for i ∈ 2:n_rk_stages + ytilde = y + + sum(a[i,j] * kscratch[j] for j ∈ 1:i-1) + + sum(a_implicit[i,j] * kscratch_implicit[j] for j ∈ 1:i-1) + ystage = backward_euler(ytilde, dt * a_implicit[i,i]) + kscratch_implicit[i] = dt*f_implicit(ytilde, dt * a_implicit[i,i]) + kscratch[i] = dt*f(ystage) + end + if adaptive + y_loworder = y + + sum(b[2,j]*kscratch[j] for j ∈ 1:n_rk_stages) + + sum(b_implicit[2,j]*kscratch_implicit[j] for j ∈ 1:n_rk_stages) + end + y = y + + sum(b[1,j]*kscratch[j] for j ∈ 1:n_rk_stages) + + sum(b_implicit[1,j]*kscratch_implicit[j] for j ∈ 1:n_rk_stages) + if adaptive + error[it+1] = y_loworder - y + end + result[it+1] = y + end + + return result, error +end + function rk4_by_hand(y0, dt, nsteps) result = zeros(nsteps+1) y = y0 @@ -122,31 +236,31 @@ function rk4_by_hand(y0, dt, nsteps) end methods = Dict( - "SSPRK3" => (rk_coeffs=Float64[0 3//4 1//3; 1 0 0; 0 1//4 0; 0 0 2//3], + "SSPRK3" => (rk_coefs=Float64[0 3//4 1//3; 1 0 0; 0 1//4 0; 0 0 2//3], a=Float64[0 0 0; 1 0 0; 1//4 1//4 0], b=Float64[1//6 1//6 2//3]), - "RK4" => (rk_coeffs = Float64[1//2 1 1 -1//3; 1//2 -1//2 0 1//3; 0 1//2 -1 2//3; 0 0 1 1//6; 0 0 0 1//6], + "RK4" => (rk_coefs = Float64[1//2 1 1 -1//3; 1//2 -1//2 0 1//3; 0 1//2 -1 2//3; 0 0 1 1//6; 0 0 0 1//6], a = Float64[0 0 0 0; 1//2 0 0 0; 0 1//2 0 0; 0 0 1 0], b = Float64[1//6 1//3 1//3 1//6]), - "RKF45" => (rk_coeffs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 -1], + "RKF45" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 -1], a = Float64[0 0 0 0 0 0; 1//4 0 0 0 0 0; 3//32 9//32 0 0 0 0; 1932//2197 -7200//2197 7296//2197 0 0 0; 439//216 -8 3680//513 -845//4104 0 0; -8//27 2 -3544//2565 1859//4104 -11//40 0], b = Float64[16//135 0 6656//12825 28561//56430 -9//50 2//55; 25//216 0 1408//2565 2197//4104 -1//5 0]), - "RKF45 truncated" => (rk_coeffs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980; 1//4 3//32 17328//2197 95//54 33//10 232//165; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836; 0 0 0 -845//4104 -77//40 -56//55; 0 0 0 0 -11//40 34//55; 0 0 0 0 0 2//55], + "RKF45 truncated" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980; 1//4 3//32 17328//2197 95//54 33//10 232//165; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836; 0 0 0 -845//4104 -77//40 -56//55; 0 0 0 0 -11//40 34//55; 0 0 0 0 0 2//55], a = Float64[0 0 0 0 0 0; 1//4 0 0 0 0 0; 3//32 9//32 0 0 0 0; 1932//2197 -7200//2197 7296//2197 0 0 0; 439//216 -8 3680//513 -845//4104 0 0; -8//27 2 -3544//2565 1859//4104 -11//40 0], b = Float64[16//135 0 6656//12825 28561//56430 -9//50 2//55]), - "Heun SSPRK2" => (rk_coeffs = Float64[0 1//2; 1 0; 0 1//2], + "Heun SSPRK2" => (rk_coefs = Float64[0 1//2; 1 0; 0 1//2], a = Float64[0 0; 1 0], b = Float64[1//2 1//2]), - "Gottlieb 43" => (rk_coeffs = Float64[0 1//2 2//3; 1 0 0; 0 1//2 -1//3; 0 0 2//3], + "Gottlieb 43" => (rk_coefs = Float64[0 1//2 2//3; 1 0 0; 0 1//2 -1//3; 0 0 2//3], a = Float64[0 0 0; 1 0 0; 1//2 1//2 0], b = Float64[1//6 1//6 2//3]), - "mk ssprk3" => (rk_coeffs = Float64[1//2 0 2//3 0 ; + "mk ssprk3" => (rk_coefs = Float64[1//2 0 2//3 0 ; 1//2 1//2 0 0 ; 0 1//2 1//6 0 ; 0 0 1//6 1//2; @@ -154,50 +268,91 @@ methods = Dict( a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2]), - "mk ssprk2" => (rk_coeffs = Float64[0.0 0.5 0.0; + "mk ssprk2" => (rk_coefs = Float64[0.0 0.5 0.0; 1.0 0.0 0.0; 0.0 0.5 0.0], a = Float64[0.0 0.0; 1.0 0.0], b = Float64[0.5 0.5; 0.5 0.5]), - "Fekete 43" => (rk_coeffs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 -1//2], + "Fekete 43" => (rk_coefs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 -1//2], a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2; 1//4 1//4 1//4 1//4]), - "Fekete 43 truncated" => (rk_coeffs = Float64[1//2 0 2//3 0; 1//2 1//2 0 0; 0 1//2 1//6 0; 0 0 1//6 1//2; 0 0 0 1//2], + "Fekete 43 truncated" => (rk_coefs = Float64[1//2 0 2//3 0; 1//2 1//2 0 0; 0 1//2 1//6 0; 0 0 1//6 1//2; 0 0 0 1//2], a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2]), - "Fekete 42" => (rk_coeffs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 -1//4], + "Fekete 42" => (rk_coefs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 -1//4], a = Float64[0 0 0 0; 1//3 0 0 0; 1//3 1//3 0 0; 1//3 1//3 1//3 0], b = Float64[1//4 1//4 1//4 1//4; 5//16 1//4 1//4 3//16]), - "Fekete 10,4" => (rk_coeffs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 -1], + "Fekete 10,4" => (rk_coefs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 -1], a = Float64[0 0 0 0 0 0 0 0 0 0; 1//6 0 0 0 0 0 0 0 0 0; 1//6 1//6 0 0 0 0 0 0 0 0; 1//6 1//6 1//6 0 0 0 0 0 0 0; 1//6 1//6 1//6 1//6 0 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 1//6 0], b = Float64[1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10; 1//5 0 0 3//10 0 0 1//5 0 3//10 0]), - "Fekete 6,4" => (rk_coeffs = [0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; 0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0 2.9254376698872875e-14 -0.18902907903375094; 0.0 0.33178669836 0.25530138316744333 -3.352873534367973e-14 0.0 0.2059808002676668 0.2504712436879622; 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522; 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.986456106503283e-14 1.1993626679930305; 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745; 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524], + "Fekete 6,4" => (rk_coefs = [0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; 0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0 2.9254376698872875e-14 -0.18902907903375094; 0.0 0.33178669836 0.25530138316744333 -3.352873534367973e-14 0.0 0.2059808002676668 0.2504712436879622; 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522; 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.986456106503283e-14 1.1993626679930305; 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745; 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524], a = [0.0 0.0 0.0 0.0 0.0 0.0; 0.3552975516919 0.0 0.0 0.0 0.0 0.0; 0.2704882223931 0.33178669836 0.0 0.0 0.0 0.0; 0.1223997401356 0.1501381660925 0.1972127376054 0.0 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.4358156542577 0.0], b = [0.1522491819555 0.1867521364225 0.1555370561501 0.1348455085546 0.2161974490441 0.1544186678729; 0.1210663237182 0.230884400455 0.0853424972752 0.3450614904457 0.0305351538213 0.1871101342844]), + + "KennedyCarpenterARK437" => (rk_coefs = Float64[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000; 247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000; 0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000; 0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000; 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; 0 0 0 0 0 0 247//2000 -29//2470], + rk_coefs_implicit = Float64[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000; 0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820; 0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164; 0 0 0 247//2000 538260754703221409274894839//225772174112649072819826640 53717436927136847537872396533404737469775216052354000//7927277355078818621920036006972820247636839819935349 -1772830339659539491048538392985299701647037422231103660763419873677962619701225032539957600128007666810800//15721979495129036484463918711394582943772656542660620350548585102102545643714934169988943899054128147359 23300798642481300915550244810330608079712179111290003575082920574543585477524931007510835130377963055801601626441651388811757948939347006283700//721721356793285693541681937242605189550706405472415761047580279465244653002939927725991664966944137182650520997040379092802938837804294847283 7898475573277855254317959379657908427305225639249224806096111567367259892230833277244398214475361481979103570897581069354516256084225361210253104267762350814757343118967653292590//10295136532244819865134216418581425896312845076778105967882093451812315373150919833368240666957561175670654409595846292949450822030580980616104277250108095432715677497120580742213; 0 0 0 0 247//2000 109149106916529224225613938235//31652606811075124885796735456 -1471402139169815526549951787477624798736224552941321200//56440086799582050350922834838054057420026494337598863 5412580004078613161687981458637008030233366457113868199883389551037522158772900//455213269145024092215621169612878312797315768577504391565569423807316058315843 3820395190970937465086284368007694267392689261092114936550556797062450867925288189416331303649552143230//14273858093118406255740963176533941671573423886510381570104226394395061792966349825149357548274393367193; 0 0 0 0 0 247//2000 3702251939282354375344210899//4399061605898178118260737311 7335745460336671146051037364344432719375//4167718263865310401834285721625989602073 11934751738672605596266458500552561610251142174626625//266467933294598164559149808395021381193202692438810102; 0 0 0 0 0 0 247//2000 1753//2000 0], + implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0], + a = Float64[0 0 0 0 0 0 0; 247//1000 0 0 0 0 0 0; 247//4000 2694949928731//7487940209513 0 0 0 0 0; 464650059369//8764239774964 878889893998//2444806327765 -952945855348//12294611323341 0 0 0 0; 476636172619//8159180917465 -1271469283451//7793814740893 -859560642026//4356155882851 1723805262919//4571918432560 0 0 0; 6338158500785//11769362343261 -4970555480458//10924838743837 3326578051521//2647936831840 -880713585975//1841400956686 -1428733748635//8843423958496 0 0; 760814592956//3276306540349 760814592956//3276306540349 -47223648122716//6934462133451 71187472546993//9669769126921 -13330509492149//9695768672337 11565764226357//8513123442827 0], + b = Float64[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000; 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + a_implicit = Float64[0 0 0 0 0 0 0; 247//2000 247//2000 0 0 0 0 0; 624185399699//4186980696204 624185399699//4186980696204 247//2000 0 0 0 0; 1258591069120//10082082980243 1258591069120//10082082980243 -322722984531//8455138723562 247//2000 0 0 0; -436103496990//5971407786587 -436103496990//5971407786587 -2689175662187//11046760208243 4431412449334//12995360898505 247//2000 0 0; -2207373168298//14430576638973 -2207373168298//14430576638973 242511121179//3358618340039 3145666661981//7780404714551 5882073923981//14490790706663 247//2000 0; 0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000], + b_implicit = Float64[0 0 9164257142617//17756377923965 -10812980402763//74029279521829 1335994250573//5691609445217 2273837961795//8368240463276 247//2000; 0 0 4469248916618//8635866897933 -621260224600//4094290005349 696572312987//2942599194819 1532940081127//5565293938103 2441//20000], + ), + +# The 5th order KennedyCarpenter548 method seems to be missing the 8'th row of a_implicit +# coefficients in the Kennedy&Carpenter2019 paper, so this is not correct. +# "KennedyCarpenterARK548" => (rk_coefs=Rational{BigInt}[1//3 170659747039489//317619014645556 1059761918771198374975975493583149778060443922616025115820976221//1221160207236679352040231791426238051865028458046356668183805968 6026949071202260192816151438453311527906342956798869364622169520394998781714920581981427863823607816039034882834095728827//611556923858701481314679349666018470018187199735628661499066355092054710782598046711683141284138645465506265796134149584 574302772466800635565942302880362733547172772643551904790822110826901330458802177285016647695347313333655288473150923245477469999353398479172681461629222234982048270948664718039299655881093933502451//150834125743572692057315590407717781072772261784442486315710184301611482636191181393913904998344549476475522992537979253944606756561784457508281554611374619511508513230391440260514965717678431926080 11804841191205866409738141418797344154150165781776069482717283225649753549804892986029880618638519186729029736737972278367957024018786254197094466036484982024080815664456754857338763116916399671315927783397636251797109720480473939029956507226170091189075837293613095202295448304114799//316529629859882632343675363951000175954808867120997357850157402897970774856378239676060399245608650982082301165291347152499432091243523124311078123020152833536134440536363380623223806386341610919656626785819858198938684429370281223424126948673612559884172118728379442353474712938880 205479529932329908716282413893618829637818370089241804929941483282179075703631150515643272392025819491441486340246313289022355398613147479287030479051759027065895455940962205315837210089822115315763871475524927356818199319782481010864996104849201569232761201059997695433556990185146963462480490411870245772241162665421012480742396686320042543732181343578393//5260860690207262979589344526020288103568995000362022999215204542913782791438284170690968045317071607104586535740950151469853326262915929889090860121924832019578588270175179588839072445427432012552661380649474236988741686278155158173159184181471487357026228329769096613600034623019687639534746341002493095888508157629508644724676106792267171767323953113600 -31073886185247651667067360120004278123539463314544688434634859477262281424368417682864989098625362184258655551942455231016080227541207487450456834716812464333109464818293826368695129979874437319388225475567363925740863990100114319609773199401453239914173351840343142392793986601320935346482664846634007108244761611421925874975284788092831794173603812123632127765067692113096713202915904166386621637429972396638482166795168390506001//205364533394638024075238227463140747921299540559472003068986564247853675492701798914552117976132500817408280838093109002246273402713204252739854979840561553732661829119263569894232347484336103156662483556486877970127505583089385295967632167610081341996276964887677919554714672505647564062872298807872882874244750782276807503913044362965995487634343580478340804213310079872837580199084484018436699646596793275992180669063759904768 1355106737355152460804424095630272213643929537644340902552944500194072220292762643530899993169071944828216401904819834895492706473204035420495272675010429289871147187217913280587665812877578699953287633363827598340626753544810744419524708014536091930419289921773711543518130861012168892746775975065634473740761885966889604878495107361010682453148097856546629634991209230617564181332375433303760587947068414808055196175764331527277384554187746358610192408611064850683460450152684230908509819835540315417629041//40682939169547015068145909076369356849344912507687127704642447159647492810337548269748338234153453270350117883220210855423331000788143399130922229954914310978098076329357245823099756637699294796313351066995366854135786629191377508136276798811699324277682691177669468459035819933886457267269032009956138595704540849884086612898962324823390614836495694603671787505773677235190081814562655593388519100675525221353268195703733221510379155250399221985970770068746195650662071392042888445769873162504653283328000; 4//9 -33777256972339//35291001627284 70523709557357325353926362451393185076417954760652143178918227//135684467470742150226692421269582005762780939782928518687089552 259173275379895127458760576309330523532337951147970777869503405464585562881462085641713656867342514214174126194545550287//67950769317633497923853261074002052224243022192847629055451817232450523420288671856853682364904293940611807310681572176 34483675781191648110786485447949476841349802351981788326801039637143030060467265358101399325091272262829110785552388913826586855287191388294264020004018018771784951430241000078153131805692088161919//16759347304841410228590621156413086785863584642715831812856687144623498070687909043768211666482727719608391443615331028216067417395753828612031283845708291056834279247821271140057218413075381325120 37972639708348074764863510493824251598614777393926316069246767548532985823720464466543328302337241205729880021748560977783654106318361163996656332009862268634667824273622244260705413151982804248607416793104785645839386988240914576932950229734443760364294642720771732116535265641883877//1652988067046053746683638011744112029986224083854097313217488659578291824249975251641648751615956288461985350529854812907497034254271731871402296864660798130688702078356564321032390988906450634802651273214837037261124240908933690833437107398628865590506232175581537087845923500903040 675361318822696286582373698984422416980652600989947732274664396784893998350699958365528677645259735958564356945089045713897031190536030043270123125611921244593280607804710357155969107797082924569677147406605660739001613329726734039554300060040262414311187767235888588699616374393926701368049944511958912556993822054410035807555792147417285087351663706964339//27473383604415706671188799191439282318638085001890564551457179279660865688622150669163944236655818392657285242202739679898122926039672078309696713970051900546688183188692604519492933881676589398886120543391698793163428806119254714904275739614351100642248081277683060093244625253547257673125897558568575056306653712065211811339975224359617452562691755148800 -14654782196644443978643077431729985169483648884762410390029797637990901973385611446945854901190651457791071027387784893201190913834296025743656446253760565138790278257703135364502640573240985081692965718431740687526063899064740113674831492617564146000207118510633012932973784580911032600355276047390516684914006149750898178961841693685461865667072601334715379620650194897355808646711454006049738723680812812767482148674909363385677//153208461421396621135495185567739923052398069941193399114958230470620996002491818237840468966321072038383955545878986081040870633770168252044018794484228460721192158231831552143316195742282489656557725827855289914222107339765096966832995426629743223394047894440013686017009358853419611284999968951905166588722274393127142106093858493006377586012922988610825679333739265936878829672332869029627379101429353713835436372158678024192 4461407513830220875580251050725490334550039459522874983321802400684610393685337405358142021414207715460029597992569661824287292651719118056453635029394091853700171157539178998334123264701613727496982908769111231517264264427131041129298949334584170031908182425677961537622802682862369968557627139451073916218048192757202579311445016491504569877930402285098941281638204030005082843744035332231113981837799491035741510194007002757944974689989789973121919931578599142856207780304241152182820589861967884680533563//212455348996523300911428636287706641324356765317921666902021668500381351342873863186463544111690255967383948945705545578321839670782526639905927200875663623996734398608865617076187617996874095047414166683198026904931330174666082542489445504905540915672342942816718335286075948543629276840182722718659834888679268882728007867361247696299928766368366405152508223641262536672659316142716090321028933081305520600400400577563940156776424477418751492593402910359007910620124150602890639661242670959746522701824000; 0 1183333538310//1827251437969 136607349717583458493057707621706//9411247739582139928720185864476093 -1222051897984293100889925908401868213380468642566580792951727180058723335646058631//9650671855308101978247774429296270313350157698860544197688698633892935233480512564 7233278175208784929821520367114267945040071552896618565825119457684675103820679881680792153156616614798360649894867022162840408524250937322223681//12186741596298968245256194155228665632735178046355106133463062982670602750335160180405169285972691766413675449216819961341316742509625063392179280 20484570779422213136105387052366878491025225861133270150585262733025655122846907974494099750739628594257427928954870893197048531281431054105510102176215838999349988480998827197029944036701651389097752980608319168178102621//3321943761931742956567494504722931571951385306909901259672532736639698840744877071134826137900014408691103259283676795574732824747130952636182780756212437965325671558426639083953787403547236777799428729452275709294783520 146369891495772494952841887152886107309500190314736084522963168283742600143244236116042027571622133931926633567867160591813599478266553183924960819072068851524440312166052500115422408817190693729567932919306865665340009051159566058468003827723339537979298568051546142650840212970175592382801497//55212156157267561109216806198851865664036304410102836735943160130831412687399333780211100436453981079073314311277679588174855628204780976556266201520567416310508791299029373557826733379133161170981394733853355927644870101350163842832370094722774179146568386199993435909173482905287863614614400 -2608156649836768732666608543008943855466995845711580787999050649351228195781061507033914673763411318695470220958850950954286872449571890733423006109802942765091360513446317866129822827445800984801155983860066367871546763258778360055065861752965423100441468071074005953745299111708108290678853038218147667954122310370079043892961158116706783824621773161296412183086567//307896894623976010054146748514391198576228873921345372725216330142644669131216427906158184527365096449292511559112523846065327932420030471820745321472435634090966569469181642312062006607116290798983644883812477636889042582133519983336548743001180374798371624666990844588001123553752938976470301117328633761192754732247383455045435878513859557375799330719481859485696 1909548155641668250566055467358906068345366179458601976147166113506387406962786757130930654052762507654586230600202502602496884646725314761271232858675706156988859855875963590256882595589327026260750395504580256371360360424928192004263152877279821526649515739460908624111704962841477598920958635461672885172637306336905501424990505224471778947022795145422267442160658515703932306687376245929356054268792796684242188488920005404990850555069033649//426962986217594225352560126442654149757269226714771429217930107091399946506198174407095332959386614596467083820170339401229876666639402307998993946055143088240497656693275829842007530240830300316277581123012713870285455960272380055770281279909977227370412869351602231174041406782116619408128941840890517628611439462164880780015487606593920867367300021415372140324697623985898646587936597854218911855447907464862510669663200553309464303450112000; 0 0 -112564739183//9373365219272 -141494584336626704174173713649734315691//13663857792723117009167381810011656236 -72907457896458909480270561323715665671699860944345248534314093311809937901279650422786737//16688723380413293827663975787515145698384312041764922108417105097427538407950227161583790 -283253049105807653303028747414106642855062970682638152850991564336240272128589470533820482679242058258518910052863505657676096838062431034696412727994041//6525411275130258429502398721925334072432494204396062297109542549038298032624244996183769068293768778067494371395845781074490822119450144306091615259310 -237721696953700223749154913893707267696512287578193059139298180411964456558990379626754852672453803797234047953931487510491782309482120520727466997920239679896189349878379596367384972963958206212403632361536300156177//5285276152106039885399099327219536921800547664967387988005124489125097000325580820284929709654645961525830100460002856603223477816268184531217968787962060318479238500849800835505240211327412169353783338947184949200 100804057866383910000138801649070810762183869052918634786866931506649754252733763555827750381755875699171087797151809466616027189228052644925307370573569943281258821230303762631070861215423688905107264275684269763247327204493146502185084296456850771369954148461253476213684196907916053927787//581009397194182943683246747667362397820851208145547835786023801302298234837753193945542724363879102126946942797243649210742030317721307484526749083812910399685373452507380638485425961411465503214946519253468274648985149358225444456830382468708463805176831557807108116864982969382541928960 -54402208114355215657456234565724981547968793718843577593572182276994943058094720821224726920367418293063502956141874696265059619186193013534177042785914258499696403669038777570572016479653043026790889286262879177045423175049097619640995010102658316523815700676634137946979952764896358299252519199642485776855302212933012209815330547162283940719719709434205225613437//1409370593141587895099595290391120781835486320443435663572931591309330864080938149244866711743325429020176530009735621002377608660001848464974221028878431758872576746455862736641798787201907313465530570387507655729620216466872192956240197086187917297539136752945017250144347794436941828923742002074720990534365830825244969786338169089194912238709105316132507904000; 0 0 0 2185051477207//2551468980502 -132695288944447609947458398029985856975//331980957249874262307105169826996586616 -2723685498164114938079489580644350119212818969205519010526020693520672946735632262289561//2832890260901206454330635046569513517583992836749798166440942858609078057026321642302930 -321673142600306093280831027699954214653452306280589059311231329340433225118902986724405241785157766853362070451132526236466153233802976851//355993248980728987846593714376172004429276355765528853582186411426211341812652624759995878867452293712679582265919533739194601331068404800 18448436911131834565321977805950346354373905671031562730094910927968922063693958119562182000660712947250599561506636714608796365799099181045439542903714116674329015828705575858914245288174439628631543//5795931771084146157187189787202814693856136298575092344243085623081594631283897848440892712292235870755049663269574538233156793740907969554230290773280582828893321275684873428246983747114675851130880 -4796791946199262879499983382324912476379929457681390320259681185684305023271739425799442742293698343271341681009941294094539644976959991589370267261787701526135504699196449112208776127343211270790408975375080787564726500765284039716507709808857756568684075873//4683336508749095233221026001210948519995715356397563647800719850229310205855843222433435804967351909173845837760887041604086346869971347139774538274404127175580104740759081290477217649076715894391047514601457812837497095533288345030892132838997503236610252800; 0 0 0 0 3012424348531//12792462456678 -1148005022743109542016656858803953488611//218180893391783363853768429352596785744 -370341084019491519786836161175184004994558333458374436755868569215325740121//380236221704573854337010481939651640455761070703010532399729321780924705340 -1283763933524395641596503821511738216840745078857812068814924022122215249311437681630260812190102025298725865456571671541329//2761410723009077202063351058071917351743886473341504644804783932844474765064463798489501255896999202864153121430476553644416 -1960617523598584667458382756346163172949518377739188335368469557160090143012475681341320259121403616693854783570192952750933406294168943605686739680098692342865431544869241527//588461690927872754606676618157059938384342658853520470993331314098308515590148509194784955329410014795207056143557059218823742824668050825851042702372207905687761356325235200; 0 0 0 0 0 -3944303808049//11994238218192 -858486924545291131785306//3089094511323674899518385 3861655060592889436696701261941241779557818997609718626307953//2887163380438394290960692114310475416005428697726491052985584 -70737902161444536934458192916666257112625499761520751358473189212602443036473227187887802234796621//99208972240881169641482648878355235855537616999141578412500871712153299076978887250231166929913664; 0 0 0 0 0 0 -457874356192//11306498036315 10298237966992729993447787//2614913088797527051627808 -36191713891094583205487720454843856502972447666645//58132638947051778286501712354010899452213104348288; 0 0 0 0 0 0 0 2//9 -1815023333875//11481503569852], +# rk_coefs_implicit = Float64[1 2//9 33777256972339//158809507322778 -70523709557357325353926362451393185076417954760652143178918227//610580103618339676020115895713119025932514229023178334091902984 -259173275379895127458760576309330523532337951147970777869503405464585562881462085641713656867342514214174126194545550287//305778461929350740657339674833009235009093599867814330749533177546027355391299023355841570642069322732753132898067074792 -34483675781191648110786485447949476841349802351981788326801039637143030060467265358101399325091272262829110785552388913826586855287191388294264020004018018771784951430241000078153131805692088161919//75417062871786346028657795203858890536386130892221243157855092150805741318095590696956952499172274738237761496268989626972303378280892228754140777305687309755754256615195720130257482858839215963040 -37972639708348074764863510493824251598614777393926316069246767548532985823720464466543328302337241205729880021748560977783654106318361163996656332009862268634667824273622244260705413151982804248607416793104785645839386988240914576932950229734443760364294642720771732116535265641883877//7438446301707241860076371052848504134938008377343437909478698968102313209124888632387419382271803298078934077384346658083736654144222793421310335890973591588099159352604539444645759450079027856611930729466766667675059084090201608750466983293829895157278044790116916895306655754063680 -675361318822696286582373698984422416980652600989947732274664396784893998350699958365528677645259735958564356945089045713897031190536030043270123125611921244593280607804710357155969107797082924569677147406605660739001613329726734039554300060040262414311187767235888588699616374393926701368049944511958912556993822054410035807555792147417285087351663706964339//123630226219870680020349596361476770433871382508507540481557306758473895598799678011237749064951182766957783589912328559541553167178524352393635212865233552460096824349116720337718202467544652294987542445262644569235429627536646217069240828264579952890116365749573770419600813640962659529066539013558587753379941704293453151029888509618278536532112898169600 14654782196644443978643077431729985169483648884762410390029797637990901973385611446945854901190651457791071027387784893201190913834296025743656446253760565138790278257703135364502640573240985081692965718431740687526063899064740113674831492617564146000207118510633012932973784580911032600355276047390516684914006149750898178961841693685461865667072601334715379620650194897355808646711454006049738723680812812767482148674909363385677//689438076396284795109728335054829653735791314735370296017312037117794482011213182070282110348444824172727799956455437364683917851965757134198084575179028073245364712043241984644922880840271203454509766225348804613999483028942936350748479419833844505273215524980061587076542114840388250782499860283573249649250234769072139477422363218528699137058153448748715557001826696715954733525497910633323205956432091712259463674714051108864 -4461407513830220875580251050725490334550039459522874983321802400684610393685337405358142021414207715460029597992569661824287292651719118056453635029394091853700171157539178998334123264701613727496982908769111231517264264427131041129298949334584170031908182425677961537622802682862369968557627139451073916218048192757202579311445016491504569877930402285098941281638204030005082843744035332231113981837799491035741510194007002757944974689989789973121919931578599142856207780304241152182820589861967884680533563//956049070484354854101428863294679885959605443930647501059097508251716081042932384339085948502606151853227770255674955102448278518521369879576672403940486307985304793739895276842844280985933427713363750074391121072190985785997371441202504772074934120525543242675232508787341768446331745780822252233969256999056709972276035403125614633349679448657648823186287006385681415026966922642222406444630198865874842701801802599037730705493910148384381716670313096615535597790558677713007878475592019318859352158208000; 0 2//9 9019974728735544362019000//16121383367705251995986549 -57651824890345896435905820007387445733//176397026424132604892288584936023214744 -569394483822128425961360994498008812778386685527868977943041878869944348458853766997807//210488291388818350516934065849262222273168238269047237479916690319680553809373402931658 -177892271546374710779107291691664969792012532687770794243317029390355982343430179360059946596144577636750977442181216291939325632637343206497346547201//147012521234151037962808775581551505208337113087251036836888218928240859032970635659340807911964767051434454493105886900324789466807896835441517907336 -383919704369457941754329028518158063811507487658336964852095693443380296068543112141786000673584208676279098136093863022708428777262383112660876324670289377930084714300408862540125034205406610949157490011608339835609911783637//26344442760191215655644739798088529689692926752327373118484506468778858097810549532007485210430802475081652108994576933264826585713577768213226959029133582479810328629748820330483662804709644439429939929608252846965120586864 -2261399638404775940589746751761420392832753756364447109721631299853303660023359404762198447745206206324794984579928463887812630219378721278566804300354141655993990378864971715455940233043394094368633920630355290024928250159116442462431613993457716201189994986398580576373637796447341690932100417953//145952048137227617938226610302497592857434727105717708999825629029961635338948544360901135364080514013359266410649571796933362882296876216275219818344916014535247229950464075526325163502362335616803411316952303511550359528874921491481911056100962355463705476002929768845044572649111998507241159360 97092263093280942741942057550605726752466994168708464529514837090991196039707625522149161651577298820704973187563178564517694300641379757266602792113481685928418577315192729672250342192196532291582485913581263215282600090242204489587216263733619619995644646398583518484852258791153979330153604407490437036360915950912833867569593777485081148866816441380275899458322624315//1612479583004224808963775230904065752678064790286785081562159213083739929788324674289887067219257108443433983018962128913764853634641547668189437137361398821199802704927937510051334276232540935195546734316300257081642762563246297006944926685030546047915828312558467525571137317205747817430366932216307777366391883573026124038422978989658576323183389169028719228490717184 -282802626277161763438401050707545946339250721480025389393398288510071885670399749493068653303108659088909408295368050261506246888038545093431463450622086150035365471807590708481769311061973508532731134307000563781849511141410805231230431778022509562382393871636238518226449187398428717818909040809695667426195691521585138951005490066131843388399667393739264776188923548153488086828503664042620160477598941768019528529813575988261761049007435793141//21295598190896517634074806612168964046102406984622923674198273681838481034098476751661608872569937782005147516986695219342148889913227138602579929066305588126088626859794197449268604455742547891320238664794542455648785085339759837315747373747299318575796168861966036993434630596679716090372380506019527557846496172514388750080669286419912765149068345588026044534047797029294973045445211187550146041575931956426433401149175067167448079387481497600; 0 0 2//9 3520027435685131909917895//67392439625927078783867992 10374073483185293075800691351051575558759313125224//47746138836300401140211549121187352932282495430227 6642791313733446579179508214782700174924317916026547161634021253916466895902882468972516472199573441//118791964230111146958678926628670032331340306410739755968623647953100579142060120581676442472306761272 -6622895468394947967912669187757757430384166778659364993716507048237325662402396562495302820418665115371324748551494603838728999391652754837466835977813519728560413//4154136066830875905459612005152635132045460364655444593270497523706808568616222990195336784464749257720280778685143458602362910961783614674055646289493447084749544 29920836681724946214974650726250291149686043740047799018094901551722329818600795752132054446672331338494928960026894953832167416707033181463992926981944339833751675914430922106897663514980100460269210542447846192646135021597//38674192958627468052836990712873600458216925237426418618887878708643014783170860460755223502803527270922392382029908449670091073022344302230096719538616962666648363169756366062116293271544077140290818250324752852339056907840 -1505331351329100790284744312062232999410728480182742988263313228480072273923793896147546730910358730742440916537651613727263338175006565989409249704402694842447576876482504319357907737621277885227743251608862669538827876935204649305331445560086485362164182598798202400033272735555030725646295469879929//369875857672103867874544244334676356349381236490850891031917082430775064643778743185734321969653007328245285359271573038251275123417021814315765569728002871020156491461633519565756279554908194388262416796753263070097027971700852257215059824494431889479383317695288221975280147593294553700407544264704 -86971973149562325232056212280020488030978769199605998946356201176203777904519805170987639175301107657472282331934640690747903775069211822108443848041622210169232080624921266741724333673319407250465683056484464248220970134336113992474333189215582686107823224484325188579233609263767744974816748117601425778060665519105143407291744772672303935953477018950602468151109411166393//99690901813582527613576610319642528190766543434329855609393196390214284549495576554797452844294800249332037961761665384801915730321192357958415208824573674018410032837631830231337140199797816584200308705369316749981413972529814756012986085402364951767565343040377810730602065530885039752281230667007931663862053248550853966137113439252661770641873160884023622867868283494400; 0 0 0 2//9 5081406456137427741660103//17412358188509308246213316 356236304392011993333949723703131159758185810705937//1277155139038113900014040244249523480364569876838190 260486926301426691443781547620527025750389451260889708090334147529502177264310268013096339163756864131//312981030590253862008846641182330180288496985307175523771278858598243851711989951486234144475242049410 37852928149328865012911315329042303413154214522109314759609464236136375878343877343743098924942700324052129496848243932304308240525612767849270762300533//36053004780910014842890824262286878474603180248018784759638286780666706355382569301404056264880196067480624823658755434150324375639435347593779556342800 -2282407438775459707827459161620796082258656114035571349534505805640738761586311700342566985607521074815130269621952300940594697694072451892889463172496981731163042213972129958348108968433780267026716885963519809201//586979546525155203441076032955690669441768372423826926737015147818414713744821503458195436376770304032419956160567706732013132695083761976296177984968780357452262886129878809385406437021903837452610385016124423680 187905796196720917487414705372253331307966329921238689334004471068314809159821853360154084116232202809571497984570623952689000063594334555529728856716894701283508482348332199532137539204074179381183915634783746631387493698467375177190429476632185477014847301089667892678056713//171539267590066748253469829383930549376008625687359040674223114224438860947702803174276756584808934631224981641442798237041027339626557083111550729701259169341702179433108120143245126633106235254081083421829904686100215770112577678034007297420438715328577239419362025241856000; 0 0 0 0 2//9 35039219008892087720673325//86154806997150663643746312 19816561771980385975982415387642517169040169952499//33005933604303579884608184152229609915639272044670 -1080291465779667303630584771557569852938590760348119247225955143675568353340734163162873//7362728080499493774017844628375478834322161690782074297578412183692075609283538079393600 91685100648952711196868954844225476461911737542022412589135402358365869905288291520183115484299444772209967806880608948953530902666248911//53470750842585428647825877252363401908523582536797620794753817140843426075734143105110415863583250150665677280703104974665972227037808640 74049777616770083818502917014560563653217344703905921643640424764673004440720926011119528196102141656185703630697851000727247130998212566262498995881110981153316679114525447468267982659//325563288501457594136865274706734413345080066918150793825834559097409312873937899975853347440100661768869793978951463834659542293231817295652841938787832036911409854062445300529557708800; 0 0 0 0 0 2//9 15781043597147568801568175//3614386140556877705089548 33422528370099625454874244206712186269//29788117088857726792026318272523497080 -7116496052409432062958367373360307502165363912530455123091509555877150515//18560597723356561489493864328969954916186144305400579545332295579833416448 5839244266756132287562573916379211891507618903337562916610354942038726347479298063853136917804854626412302750421//1913342871539584101918970805351321257356802075910770834964527552833176346473547991524123639420838328364588698624; 0 0 0 0 0 0 2//9 457874356192//11306498036315 -10278899720217//11421967853998 9602897495277010359483416541522520749//84641325960427269717195312383118262376; 0 0 0 0 0 0 0 2//9 7//9 0], +# implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0, 0], +# a = Float64[0 0 0 0 0 0 0 0; 4//9 0 0 0 0 0 0 0; 1//9 1183333538310//1827251437969 0 0 0 0 0 0; 895379019517//9750411845327 477606656805//13473228687314 -112564739183//9373365219272 0 0 0 0 0; -4458043123994//13015289567637 -2500665203865//9342069639922 983347055801//8893519644487 2185051477207//2551468980502 0 0 0 0; -167316361917//17121522574472 1605541814917//7619724128744 991021770328//13052792161721 2342280609577//11279663441611 3012424348531//12792462456678 0 0 0; 6680998715867//14310383562358 5029118570809//3897454228471 2415062538259//6382199904604 -3924368632305//6964820224454 -4331110370267//15021686902756 -3944303808049//11994238218192 0 0; 2193717860234//3570523412979 2193717860234//3570523412979 5952760925747//18750164281544 -4412967128996//6196664114337 4151782504231//36106512998704 572599549169//6265429158920 -457874356192//11306498036315 0], +# b = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926], +# a_implicit = Float64[0 0 0 0 0 0 0 0; 2//9 2//9 0 0 0 0 0 0; 2366667076620//8822750406821 2366667076620//8822750406821 2//9 0 0 0 0 0; -257962897183//4451812247028 -257962897183//4451812247028 128530224461//14379561246022 2//9 0 0 0 0; -486229321650//11227943450093 -486229321650//11227943450093 -225633144460//6633558740617 1741320951451//6824444397158 2//9 0 0 0; 621307788657//4714163060173 621307788657//4714163060173 -125196015625//3866852212004 940440206406//7593089888465 961109811699//6734810228204 2//9 0 0; 2036305566805//6583108094622 2036305566805//6583108094622 -3039402635899//4450598839912 -1829510709469//31102090912115 -286320471013//6931253422520 8651533662697//9642993110008 2//9 0; 0 0 0 0 0 0 0 2//9], +# b_implicit = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926], +# ), + + "KennedyCarpenterARK324" => (rk_coefs = Float64[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800; 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; 0 0 0 1767732205903//4055673282236 -189157537172543652255956//2412892370833855116699825], + rk_coefs_implicit = Float64[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200; 0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425; 0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225; 0 0 0 1767732205903//4055673282236 2287941076333//4055673282236 0], + implicit_coefficient_is_zero = Bool[1, 0, 0, 0], + a = Float64[0 0 0 0; 1767732205903//2027836641118 0 0 0; 5535828885825//10492691773637 788022342437//10882634858940 0 0; 6485989280629//16251701735622 -4246266847089//9704473918619 10755448449292//10357097424841 0], + b = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], + a_implicit = Float64[0 0 0 0; 1767732205903//4055673282236 1767732205903//4055673282236 0 0; 2746238789719//10658868560708 -640167445237//6845629431997 1767732205903//4055673282236 0; 1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236], + b_implicit = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100] + ), ) -a, b = convert_rk_coeffs_to_butcher_tableau(methods["RKF45"].rk_coeffs) -methods["RKF45 attempt 2"] = (rk_coeffs = methods["RKF45"].rk_coeffs, +a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs) +methods["RKF45 attempt 2"] = (rk_coefs = methods["RKF45"].rk_coefs, a = a, b = b) for (k,v) ∈ methods + imex = any(:rk_coefs_implicit ∈ keys(v)) + println("\n", k) - result, error = rk_advance(v.rk_coeffs, y0, dt, nsteps) - result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps) + + if imex + this_result, this_error = rk_advance(v.rk_coefs, y0, dt, nsteps, v.rk_coefs_implicit, v.implicit_coefficient_is_zero) + result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps, v.a_implicit, v.b_implicit) + else + this_result, this_error = rk_advance(v.rk_coefs, y0, dt, nsteps) + result_butcher, error_butcher = rk_advance_butcher(v.a, v.b, y0, dt, nsteps) + end #for i ∈ 1:multiplier:nsteps+1 - # println("$i t=", t[i], " analytic=", analytic[i], " result=", result[i], " result_butcher=", result_butcher[i]) + # println("$i t=", t[i], " analytic=", analytic[i], " result=", this_result[i], " result_butcher=", result_butcher[i]) #end println("t=", t[end]) - println("analytic = ", analytic[end]) - println("result = ", result[end]) + if imex + println("analytic = ", analytic_implicit[end]) + else + println("analytic = ", analytic[end]) + end + println("result = ", this_result[end]) println("result_butcher = ", result_butcher[end]) - println("error = ", error[end]) + println("error = ", this_error[end]) println("error_butcher = ", error_butcher[end]) end @@ -230,7 +385,7 @@ elseif n_rk_stages == 1 else error("Unsupported number of RK stages, n_rk_stages=$n_rk_stages") end -result = rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps) +ssprk3_result = rk_advance_non_adaptive(rk_coefs, y0, dt, nsteps) println("t=", t[end]) println("analytic = ", analytic[end]) -println("result = ", result[end]) +println("result = ", ssprk3_result[end]) From 5c9cbd4b73cf7d93c5208eb697e3a4f6f47055ae Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 18 May 2024 15:02:36 +0100 Subject: [PATCH 26/75] Split up rk_update!() function The rk_update!() function was doing several jobs: applying the RK update to evolving variables; applying boundary conditions and moment constraints; updating derived variables and moment derivatives; calling the adaptive timestep update. To support IMEX functionality it will be convenient to split up some of these roles into separate functions, which can be reused in more than one place. --- moment_kinetics/src/time_advance.jl | 270 +++++++++++++--------------- 1 file changed, 122 insertions(+), 148 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index fa01cba5a..3ad01b14b 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -1488,28 +1488,16 @@ function time_advance_no_splitting!(pdf, scratch, t, t_params, vz, vr, vzeta, vp end """ -use information obtained from the Runge-Kutta stages to compute the updated pdf; -for the quantities (density, upar, ppar, vth, qpar and phi) that are derived -from the 'true', un-modified pdf, either: update them using info from Runge Kutta -stages, if the quantities are evolved separately from the modified pdf; -or update them by taking the appropriate velocity moment of the evolved pdf +Use the result of the forward-Euler timestep and the previous Runge-Kutta stages to +compute the updated pdfs, and any evolved moments. """ -function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr, vzeta, - vpa, vperp, z, r, spectral_objects, advect_objects, t, t_params, - istage, composition, collisions, geometry, external_source_settings, - gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments, - istep) +function rk_update!(scratch, moments, t_params, istage, composition) begin_s_r_z_region() new_scratch = scratch[istage+1] old_scratch = scratch[istage] rk_coefs = t_params.rk_coefs[:,istage] - z_spectral, r_spectral, vpa_spectral, vperp_spectral = spectral_objects.z_spectral, spectral_objects.r_spectral, spectral_objects.vpa_spectral, spectral_objects.vperp_spectral - vzeta_spectral, vr_spectral, vz_spectral = spectral_objects.vzeta_spectral, spectral_objects.vr_spectral, spectral_objects.vz_spectral - vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect - neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect - ## # update the ion distribution and moments ## @@ -1519,49 +1507,6 @@ function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, v # use Runge Kutta to update any velocity moments evolved separately from the pdf rk_update_evolved_moments!(scratch, moments, t_params, istage) - # Ensure there are no negative values in the pdf before applying boundary - # conditions, so that negative deviations do not mess up the integral-constraint - # corrections in the sheath boundary conditions. - force_minimum_pdf_value!(new_scratch.pdf, num_diss_params.ion.force_minimum_pdf_value) - - # Enforce boundary conditions in z and vpa on the distribution function. - # Must be done after Runge Kutta update so that the boundary condition applied to - # the updated pdf is consistent with the updated moments - otherwise different upar - # between 'pdf', 'old_scratch' and 'new_scratch' might mean a point that should be - # set to zero at the sheath boundary according to the final upar has a non-zero - # contribution from one or more of the terms. - # NB: probably need to do the same for the evolved moments - enforce_boundary_conditions!(new_scratch, moments, - boundary_distributions.pdf_rboundary_ion, vpa.bc, z.bc, r.bc, vpa, vperp, z, - r, vpa_spectral, vperp_spectral, - vpa_advect, vperp_advect, z_advect, r_advect, composition, scratch_dummy, - advance.r_diffusion, advance.vpa_diffusion, advance.vperp_diffusion) - - if moments.evolve_density && moments.enforce_conservation - begin_s_r_z_region() - A = moments.ion.constraints_A_coefficient - B = moments.ion.constraints_B_coefficient - C = moments.ion.constraints_C_coefficient - @loop_s_r_z is ir iz begin - (A[iz,ir,is], B[iz,ir,is], C[iz,ir,is]) = - @views hard_force_moment_constraints!(new_scratch.pdf[:,:,iz,ir,is], - moments, vpa) - end - end - - function update_derived_ion_moments_and_derivatives() - # update remaining velocity moments that are calculable from the evolved pdf - # Note these may be needed for the boundary condition on the neutrals, so must be - # calculated before that is applied. Also may be needed to calculate advection speeds - # for for CFL stability limit calculations in adaptive_timestep_update!(). - update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, - r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) - - calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z, z_spectral, - num_diss_params.ion.moment_dissipation_coefficient) - end - update_derived_ion_moments_and_derivatives() - if composition.n_neutral_species > 0 ## # update the neutral particle distribution and moments @@ -1569,109 +1514,127 @@ function rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, v rk_update_variable!(scratch, :pdf_neutral, t_params, istage; neutrals=true) # use Runge Kutta to update any velocity moments evolved separately from the pdf rk_update_evolved_moments_neutral!(scratch, moments, t_params, istage) + end +end +""" +Apply boundary conditions and moment constraints to updated pdfs and calculate derived +moments and moment derivatives +""" +function apply_all_bcs_constraints_update_moments!( + this_scratch, moments, fields, boundary_distributions, vz, vr, vzeta, vpa, vperp, + z, r, spectral_objects, advect_objects, composition, geometry, gyroavs, + num_diss_params, advance, scratch_dummy, diagnostic_moments; pdf_bc_constraints=true) + + begin_s_r_z_region() + + z_spectral, r_spectral, vpa_spectral, vperp_spectral = spectral_objects.z_spectral, spectral_objects.r_spectral, spectral_objects.vpa_spectral, spectral_objects.vperp_spectral + vzeta_spectral, vr_spectral, vz_spectral = spectral_objects.vzeta_spectral, spectral_objects.vr_spectral, spectral_objects.vz_spectral + vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect + neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect + + if pdf_bc_constraints # Ensure there are no negative values in the pdf before applying boundary # conditions, so that negative deviations do not mess up the integral-constraint # corrections in the sheath boundary conditions. - force_minimum_pdf_value_neutral!(new_scratch.pdf_neutral, num_diss_params.neutral.force_minimum_pdf_value) + force_minimum_pdf_value!(this_scratch.pdf, num_diss_params.ion.force_minimum_pdf_value) # Enforce boundary conditions in z and vpa on the distribution function. - # Must be done after Runge Kutta update so that the boundary condition applied to - # the updated pdf is consistent with the updated moments - otherwise different upar - # between 'pdf', 'old_scratch' and 'new_scratch' might mean a point that should be - # set to zero at the sheath boundary according to the final upar has a non-zero - # contribution from one or more of the terms. - # NB: probably need to do the same for the evolved moments - # Note, so far vr and vzeta do not need advect objects, so pass `nothing` for - # those as a placeholder - enforce_neutral_boundary_conditions!(new_scratch.pdf_neutral, new_scratch.pdf, - boundary_distributions, new_scratch.density_neutral, new_scratch.uz_neutral, - new_scratch.pz_neutral, moments, new_scratch.density, new_scratch.upar, - fields.Er, vzeta_spectral, vr_spectral, vz_spectral, neutral_r_advect, - neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr, vz, - composition, geometry, scratch_dummy, advance.r_diffusion, - advance.vz_diffusion) + # Must be done after Runge Kutta update so that the boundary condition applied to the + # updated pdf is consistent with the updated moments - otherwise different upar + # between 'pdf', 'scratch[istage]' and 'scratch[istage+1]' might mean a point that + # should be set to zero at the sheath boundary according to the final upar has a + # non-zero contribution from one or more of the terms. NB: probably need to do the + # same for the evolved moments + enforce_boundary_conditions!(this_scratch, moments, + boundary_distributions.pdf_rboundary_ion, vpa.bc, z.bc, r.bc, vpa, vperp, z, r, + vpa_spectral, vperp_spectral, vpa_advect, vperp_advect, z_advect, r_advect, + composition, scratch_dummy, advance.r_diffusion, advance.vpa_diffusion, + advance.vperp_diffusion) if moments.evolve_density && moments.enforce_conservation - begin_sn_r_z_region() - A = moments.neutral.constraints_A_coefficient - B = moments.neutral.constraints_B_coefficient - C = moments.neutral.constraints_C_coefficient - @loop_sn_r_z isn ir iz begin - (A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn]) = - @views hard_force_moment_constraints_neutral!( - new_scratch.pdf_neutral[:,:,:,iz,ir,isn], moments, vz) + begin_s_r_z_region() + A = moments.ion.constraints_A_coefficient + B = moments.ion.constraints_B_coefficient + C = moments.ion.constraints_C_coefficient + @loop_s_r_z is ir iz begin + (A[iz,ir,is], B[iz,ir,is], C[iz,ir,is]) = + @views hard_force_moment_constraints!(this_scratch.pdf[:,:,iz,ir,is], + moments, vpa) end end + end - function update_derived_neutral_moments_and_derivatives() - # update remaining velocity moments that are calculable from the evolved pdf - update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r, - composition) - # update the thermal speed - begin_sn_r_z_region() - @loop_sn_r_z isn ir iz begin - moments.neutral.vth[iz,ir,isn] = sqrt(2.0*new_scratch.pz_neutral[iz,ir,isn]/new_scratch.density_neutral[iz,ir,isn]) - end - - # update the parallel heat flux - update_neutral_qz!(moments.neutral.qz, moments.neutral.qz_updated, - new_scratch.density_neutral, new_scratch.uz_neutral, - moments.neutral.vth, new_scratch.pdf_neutral, vz, vr, vzeta, z, - r, composition, moments.evolve_density, moments.evolve_upar, - moments.evolve_ppar) + # update remaining velocity moments that are calculable from the evolved pdf + # Note these may be needed for the boundary condition on the neutrals, so must be + # calculated before that is applied. Also may be needed to calculate advection speeds + # for for CFL stability limit calculations in adaptive_timestep_update!(). + update_derived_moments!(this_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) - calculate_neutral_moment_derivatives!(moments, new_scratch, scratch_dummy, z, - z_spectral, - num_diss_params.neutral.moment_dissipation_coefficient) - end - update_derived_neutral_moments_and_derivatives() - end + calculate_ion_moment_derivatives!(moments, this_scratch, scratch_dummy, z, z_spectral, + num_diss_params.ion.moment_dissipation_coefficient) # update the electrostatic potential phi - update_phi!(fields, scratch[istage+1], vperp, z, r, composition, z_spectral, - r_spectral, scratch_dummy, gyroavs) - # _block_synchronize() here because phi needs to be read on different ranks than - # it was written on, even though the loop-type does not change here. However, - # after the final RK stage can skip if: - # * evolving upar or ppar as synchronization will be triggered after moments - # updates at the beginning of the next RK step - _block_synchronize() + update_phi!(fields, this_scratch, vperp, z, r, composition, z_spectral, r_spectral, + scratch_dummy, gyroavs) - if t_params.adaptive && istage == t_params.n_rk_stages - # Note the timestep update must be done before calculating derived moments and - # moment derivatives, because the timstep might need to be re-done with a smaller - # dt, in which case scratch[t_params.n_rk_stages+1] will be reset to the values - # from the beginning of the timestep here. - adaptive_timestep_update!(scratch, t, t_params, moments, fields, composition, - collisions, geometry, external_source_settings, - advect_objects, r, z, vperp, vpa, vzeta, vr, vz) - # Re-do this in case adaptive_timestep_update re-arranged the `scratch` vector - new_scratch = scratch[istage+1] - old_scratch = scratch[istage] - - if t_params.previous_dt[] == 0.0 - # Re-update remaining velocity moments that are calculable from the evolved - # pdf These need to be re-calculated because `new_scratch` was swapped with - # the beginning of the timestep, because the timestep failed - update_derived_ion_moments_and_derivatives() - if composition.n_neutral_species > 0 - update_derived_neutral_moments_and_derivatives() - end + if composition.n_neutral_species > 0 + if pdf_bc_constraints + # Ensure there are no negative values in the pdf before applying boundary + # conditions, so that negative deviations do not mess up the integral-constraint + # corrections in the sheath boundary conditions. + force_minimum_pdf_value_neutral!(this_scratch.pdf_neutral, + num_diss_params.neutral.force_minimum_pdf_value) + + # Enforce boundary conditions in z and vpa on the distribution function. + # Must be done after Runge Kutta update so that the boundary condition applied to + # the updated pdf is consistent with the updated moments - otherwise different + # upar between 'pdf', 'scratch[istage]' and 'scratch[istage+1]' might mean a point + # that should be set to zero at the sheath boundary according to the final upar + # has a non-zero contribution from one or more of the terms. NB: probably need to + # do the same for the evolved moments Note, so far vr and vzeta do not need advect + # objects, so pass `nothing` for those as a placeholder + enforce_neutral_boundary_conditions!(this_scratch.pdf_neutral, this_scratch.pdf, + boundary_distributions, this_scratch.density_neutral, this_scratch.uz_neutral, + this_scratch.pz_neutral, moments, this_scratch.density, this_scratch.upar, + fields.Er, vzeta_spectral, vr_spectral, vz_spectral, neutral_r_advect, + neutral_z_advect, nothing, nothing, neutral_vz_advect, r, z, vzeta, vr, vz, + composition, geometry, scratch_dummy, advance.r_diffusion, + advance.vz_diffusion) - # update the electrostatic potential phi - update_phi!(fields, scratch[istage+1], vperp, z, r, composition, z_spectral, - r_spectral, scratch_dummy, gyroavs) - if !(( moments.evolve_upar || moments.evolve_ppar) && - istage == length(scratch)-1) - # _block_synchronize() here because phi needs to be read on different ranks than - # it was written on, even though the loop-type does not change here. However, - # after the final RK stage can skip if: - # * evolving upar or ppar as synchronization will be triggered after moments - # updates at the beginning of the next RK step - _block_synchronize() + if moments.evolve_density && moments.enforce_conservation + begin_sn_r_z_region() + A = moments.neutral.constraints_A_coefficient + B = moments.neutral.constraints_B_coefficient + C = moments.neutral.constraints_C_coefficient + @loop_sn_r_z isn ir iz begin + (A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn]) = + @views hard_force_moment_constraints_neutral!( + this_scratch.pdf_neutral[:,:,:,iz,ir,isn], moments, vz) + end end end + + # update remaining velocity moments that are calculable from the evolved pdf + update_derived_moments_neutral!(this_scratch, moments, vz, vr, vzeta, z, r, + composition) + # update the thermal speed + begin_sn_r_z_region() + @loop_sn_r_z isn ir iz begin + moments.neutral.vth[iz,ir,isn] = sqrt(2.0*this_scratch.pz_neutral[iz,ir,isn]/this_scratch.density_neutral[iz,ir,isn]) + end + + # update the parallel heat flux + update_neutral_qz!(moments.neutral.qz, moments.neutral.qz_updated, + this_scratch.density_neutral, this_scratch.uz_neutral, + moments.neutral.vth, this_scratch.pdf_neutral, vz, vr, vzeta, z, + r, composition, moments.evolve_density, moments.evolve_upar, + moments.evolve_ppar) + + calculate_neutral_moment_derivatives!(moments, this_scratch, scratch_dummy, z, + z_spectral, + num_diss_params.neutral.moment_dissipation_coefficient) end end @@ -1881,6 +1844,17 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms, total_points, current_dt, error_norm_method) + if t_params.previous_dt[] == 0.0 + # Re-update remaining velocity moments that are calculable from the evolved + # pdf These need to be re-calculated because `scratch[istage+1]` is now the + # state at the beginning of the timestep, because the timestep failed + apply_all_bcs_constraints_update_moments!( + scratch[t_params.n_rk_stages+1], moments, fields, nothing, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, false; + pdf_bc_constraints=false) + end + return nothing end @@ -2019,11 +1993,11 @@ function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase collisions, geometry, scratch_dummy, manufactured_source_list, external_source_settings, num_diss_params, advance, fp_arrays, istage) diagnostic_moments = diagnostic_checks && istage == n_rk_stages - @views rk_update!(scratch, pdf, moments, fields, boundary_distributions, vz, vr, - vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, - t, t_params, istage, composition, collisions, geometry, - external_source_settings, gyroavs, num_diss_params, advance, - scratch_dummy, diagnostic_moments, istep) + rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) + apply_all_bcs_constraints_update_moments!( + scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments) end istage = n_rk_stages+1 From 2dfab31c51b369ed187dfb0a1087b289e7dacc93 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 2 May 2024 11:50:41 +0100 Subject: [PATCH 27/75] Add framework for IMEX timestepping Still need to add an implementation in backward_euler!() to use IMEX schemes. --- moment_kinetics/src/input_structs.jl | 4 +- moment_kinetics/src/moment_kinetics.jl | 8 +- moment_kinetics/src/runge_kutta.jl | 452 ++++++++++++++++++++----- moment_kinetics/src/time_advance.jl | 241 ++++++++----- 4 files changed, 513 insertions(+), 192 deletions(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 17c0e1918..db193e24d 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -38,7 +38,7 @@ end an option but known at compile time when a `time_info` struct is passed as a function argument. """ -struct time_info{Terrorsum <: Real} +struct time_info{Terrorsum <: Real, Trkimp, Timpzero} n_variables::mk_int nstep::mk_int end_time::mk_float @@ -59,6 +59,8 @@ struct time_info{Terrorsum <: Real} dfns_output_times::Vector{mk_float} type::String rk_coefs::Array{mk_float,2} + rk_coefs_implicit::Trkimp + implicit_coefficient_is_zero::Timpzero n_rk_stages::mk_int rk_order::mk_int adaptive::Bool diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index b1a07f0eb..65f0cd93f 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -318,8 +318,8 @@ function setup_moment_kinetics(input_dict::AbstractDict; # create arrays and do other work needed to setup # the main time advance loop -- including normalisation of f by density if requested - moments, spectral_objects, scratch, advance, t_params, fp_arrays, gyroavs, - manufactured_source_list = + moments, spectral_objects, scratch, scratch_implicit, advance, t_params, fp_arrays, + gyroavs, manufactured_source_list = setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrophase, vz_spectral, vr_spectral, vzeta_spectral, vpa_spectral, vperp_spectral, z_spectral, r_spectral, composition, moments, t_input, code_time, dt, @@ -350,8 +350,8 @@ function setup_moment_kinetics(input_dict::AbstractDict; begin_s_r_z_vperp_region() - return pdf, scratch, code_time, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advection_structs, + return pdf, scratch, scratch_implicit, code_time, t_params, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, moments, fields, spectral_objects, advection_structs, composition, collisions, geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, manufactured_source_list, ascii_io, io_moments, io_dfns diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index bb9cff095..a16cd8d8b 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -22,6 +22,10 @@ e.g., if f is the function to be updated, then f^{n+1}[stage+1] = rk_coef[1,stage]*f^{n} + rk_coef[2,stage]*f^{n+1}[stage] + rk_coef[3,stage]*(f^{n}+dt*G[f^{n+1}[stage]] """ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operators) + + rk_coefs_implicit = nothing + implicit_coefficient_is_zero = nothing + if type == "RKF5(4)" # Embedded 5th order / 4th order Runge-Kutta-Fehlberg method. # Note uses the 5th order solution for the time advance, even though the error @@ -144,6 +148,57 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat else CFL_prefactor = input_CFL_prefactor end + elseif type == "KennedyCarpenterARK437" + # 7-stage 4th-order IMEX scheme from Kennedy & Carpenter 2019 + # (https://doi.org/10.1016/j.apnum.2018.10.007) + rk_coefs = mk_float[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000; + 247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000; + 0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000; + 0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000; + 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; + 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; + 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; + 0 0 0 0 0 0 247//2000 -29//2470] + rk_coefs_implicit = mk_float[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000; + 0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820; + 0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164; + 0 0 0 247//2000 538260754703221409274894839//225772174112649072819826640 53717436927136847537872396533404737469775216052354000//7927277355078818621920036006972820247636839819935349 -1772830339659539491048538392985299701647037422231103660763419873677962619701225032539957600128007666810800//15721979495129036484463918711394582943772656542660620350548585102102545643714934169988943899054128147359 23300798642481300915550244810330608079712179111290003575082920574543585477524931007510835130377963055801601626441651388811757948939347006283700//721721356793285693541681937242605189550706405472415761047580279465244653002939927725991664966944137182650520997040379092802938837804294847283 7898475573277855254317959379657908427305225639249224806096111567367259892230833277244398214475361481979103570897581069354516256084225361210253104267762350814757343118967653292590//10295136532244819865134216418581425896312845076778105967882093451812315373150919833368240666957561175670654409595846292949450822030580980616104277250108095432715677497120580742213; + 0 0 0 0 247//2000 109149106916529224225613938235//31652606811075124885796735456 -1471402139169815526549951787477624798736224552941321200//56440086799582050350922834838054057420026494337598863 5412580004078613161687981458637008030233366457113868199883389551037522158772900//455213269145024092215621169612878312797315768577504391565569423807316058315843 3820395190970937465086284368007694267392689261092114936550556797062450867925288189416331303649552143230//14273858093118406255740963176533941671573423886510381570104226394395061792966349825149357548274393367193; + 0 0 0 0 0 247//2000 3702251939282354375344210899//4399061605898178118260737311 7335745460336671146051037364344432719375//4167718263865310401834285721625989602073 11934751738672605596266458500552561610251142174626625//266467933294598164559149808395021381193202692438810102; + 0 0 0 0 0 0 247//2000 1753//2000 0] + implicit_coefficient_is_zero = Bool[true, false, false, false, false, false, false] + n_rk_stages = 7 + rk_order = 4 + adaptive = true + low_storage = false + if input_CFL_prefactor ≤ 0.0 + CFL_prefactor = 4.0 + else + CFL_prefactor = input_CFL_prefactor + end + elseif type == "KennedyCarpenterARK324" + # 4-stage 3th-order IMEX scheme from Kennedy & Carpenter 2003 + # (https://doi.org/10.1016/S0168-9274(02)00138-1, + # https://ntrs.nasa.gov/api/citations/20010075154/downloads/20010075154.pdf) + rk_coefs = mk_float[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800; + 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; + 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; + 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; + 0 0 0 1767732205903//4055673282236 -189157537172543652255956//2412892370833855116699825] + rk_coefs_implicit = mk_float[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200; + 0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425; + 0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225; + 0 0 0 1767732205903//4055673282236 2287941076333//4055673282236 0] + implicit_coefficient_is_zero = Bool[true, false, false, false] + n_rk_stages = 4 + rk_order = 3 + adaptive = true + low_storage = false + if input_CFL_prefactor ≤ 0.0 + CFL_prefactor = 4.0 + else + CFL_prefactor = input_CFL_prefactor + end elseif type == "SSPRK4" n_rk_stages = 4 rk_coefs = allocate_float(3, n_rk_stages) @@ -197,35 +252,71 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat CFL_prefactor = NaN else error("Unsupported RK timestep method, type=$type\n" - * "Valid methods are: SSPRK4, SSPRK3, SSPRK2, SSPRK1, RKF5(4), Fekete10(4)," - * "Fekete6(4), Fekete4(3), Fekete4(2)") + * "Valid methods are: SSPRK4, SSPRK3, SSPRK2, SSPRK1, RKF5(4), Fekete10(4), " + * "Fekete6(4), Fekete4(3), Fekete4(2), KennedyCarpenterARK437, " + * "KennedyCarpenterARK324") end if split_operators && adaptive error("Adaptive timestepping not supported with operator splitting") end - return rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor + # Sanity check size of rk_coefs arrays + if low_storage + correct_size = (3, n_rk_stages + adaptive) + if size(rk_coefs) != correct_size + error("Size of rk_coefs, $(size(rk_coefs)) is not " + * "(n_rk_stages+1, n_rk_stages+1)=$correct_size") + end + + correct_size_implicit = (3, n_rk_stages + 1 + adaptive) + if rk_coefs_implicit !== nothing && size(rk_coefs_implicit) != correct_size_implicit + error("Size of rk_coefs_implicit, $(size(rk_coefs_implicit)) is not " + * "(3, n_rk_stages+2)=$correct_size_implicit") + end + else + correct_size = (n_rk_stages + 1, n_rk_stages + adaptive) + if size(rk_coefs) != correct_size + error("Size of rk_coefs, $(size(rk_coefs)) is not " + * "(n_rk_stages+1, n_rk_stages+1)=$correct_size") + end + + correct_size_implicit = (n_rk_stages, n_rk_stages + 1 + adaptive) + if rk_coefs_implicit !== nothing && size(rk_coefs_implicit) != correct_size_implicit + error("Size of rk_coefs_implicit, $(size(rk_coefs_implicit)) is not " + * "(n_rk_stages, n_rk_stages+2)=$correct_size_implicit") + end + end + + correct_size = (n_rk_stages,) + if implicit_coefficient_is_zero !== nothing && + size(implicit_coefficient_is_zero) != correct_size + error("Size of implicit_coefficient_is_zero, $(size(implicit_coefficient_is_zero)) " + * "is not (n_rk_stages,)=$correct_size") + end + + return rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, + rk_order, adaptive, low_storage, CFL_prefactor end """ use Runge Kutta to update any ion velocity moments evolved separately from the pdf """ -function rk_update_evolved_moments!(scratch, moments, t_params, istage) +function rk_update_evolved_moments!(scratch, scratch_implicit, moments, t_params, istage) # if separately evolving the particle density, update using RK if moments.evolve_density - rk_update_variable!(scratch, :density, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :density, t_params, istage) end # if separately evolving the parallel flow, update using RK if moments.evolve_upar - rk_update_variable!(scratch, :upar, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :upar, t_params, istage) end # if separately evolving the parallel pressure, update using RK; if moments.evolve_ppar - rk_update_variable!(scratch, :ppar, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :ppar, t_params, istage) end end @@ -233,30 +324,35 @@ end use Runge Kutta to update any electron velocity moments evolved separately from the pdf """ -function rk_update_evolved_moments_electron!(scratch, moments, t_params, istage) +function rk_update_evolved_moments_electron!(scratch, scratch_implicit, moments, t_params, + istage) # For now, electrons always fully moment kinetic, and ppar is the only evolving moment # (density and upar are calculated from quasineutrality and ambipolarity constraints). - rk_update_variable!(scratch, :ppar_electron, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :ppar_electron, t_params, istage) end """ use Runge Kutta to update any neutral-particle velocity moments evolved separately from the pdf """ -function rk_update_evolved_moments_neutral!(scratch, moments, t_params, istage) +function rk_update_evolved_moments_neutral!(scratch, scratch_implicit, moments, t_params, + istage) # if separately evolving the particle density, update using RK if moments.evolve_density - rk_update_variable!(scratch, :density_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :density_neutral, t_params, istage; + neutrals=true) end # if separately evolving the parallel flow, update using RK if moments.evolve_upar - rk_update_variable!(scratch, :uz_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :uz_neutral, t_params, istage; + neutrals=true) end # if separately evolving the parallel pressure, update using RK; if moments.evolve_ppar - rk_update_variable!(scratch, :pz_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :pz_neutral, t_params, istage; + neutrals=true) end end @@ -264,29 +360,52 @@ end Update the variable named `var_symbol` in `scratch` to the current Runge-Kutta stage `istage`. The current value in `scratch[istage+1]` is the result of the forward-Euler update, which needs to be corrected using values from previous stages with the Runge-Kutta -coefficients. +coefficients. `scratch_implicit` contains the results of backward-Euler updates, which are +needed for IMEX timestepping schemes. """ -function rk_update_variable!(scratch, var_symbol::Symbol, t_params, istage; neutrals=false) +function rk_update_variable!(scratch, scratch_implicit, var_symbol::Symbol, t_params, + istage; neutrals=false) if t_params.low_storage var_arrays = (getfield(scratch[istage+1], var_symbol), getfield(scratch[istage], var_symbol), getfield(scratch[1], var_symbol)) + if scratch_implicit === nothing + var_arrays_implicit = (nothing, nothing, nothing) + else + var_arrays_implicit = (getfield(scratch_implicit[istage+1], var_symbol), + getfield(scratch_implicit[istage], var_symbol), + getfield(scratch_implicit[1], var_symbol)) + end else var_arrays = Tuple(getfield(scratch[i], var_symbol) for i ∈ 1:istage+1) + if scratch_implicit === nothing + var_arrays_implicit = nothing + else + var_arrays_implicit = Tuple(getfield(scratch_implicit[i], var_symbol) + for i ∈ 1:istage) + end end rk_coefs = @view t_params.rk_coefs[:,istage] + if t_params.rk_coefs_implicit === nothing + rk_coefs_implicit = nothing + else + rk_coefs_implicit = @view t_params.rk_coefs_implicit[:,istage+1] + end if neutrals if t_params.low_storage - rk_update_loop_neutrals_low_storage!(rk_coefs, var_arrays...) + rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit, + var_arrays..., var_arrays_implicit...) else - rk_update_loop_neutrals!(rk_coefs, var_arrays) + rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit, var_arrays, + var_arrays_implicit) end else if t_params.low_storage - rk_update_loop_low_storage!(rk_coefs, var_arrays...) + rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, var_arrays..., + var_arrays_implicit...) else - rk_update_loop!(rk_coefs, var_arrays) + rk_update_loop!(rk_coefs, rk_coefs_implicit, var_arrays, var_arrays_implicit) end end @@ -300,7 +419,8 @@ timestepping methods. The calculated error is stored in `var_symbol` in `scratch[2]` (as this entry should not be needed again after the error is calculated). """ -function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=false) +function rk_error_variable!(scratch, scratch_implicit, var_symbol::Symbol, t_params; + neutrals=false) if !t_params.adaptive error("rk_error_variable!() should only be called when using adaptive " * "timestepping") @@ -309,11 +429,29 @@ function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=fals var_arrays = (getfield(scratch[end], var_symbol), getfield(scratch[end-1], var_symbol), getfield(scratch[1], var_symbol)) + if scratch_implicit === nothing + var_arrays_implicit = (nothing, nothing, nothing) + else + var_arrays_implicit = (getfield(scratch_implicit[end], var_symbol), + getfield(scratch_implicit[end-1], var_symbol), + getfield(scratch_implicit[1], var_symbol)) + end else var_arrays = Tuple(getfield(scratch[i], var_symbol) for i ∈ 1:length(scratch)) + if scratch_implicit === nothing + var_arrays_implicit = nothing + else + var_arrays_implicit = Tuple(getfield(scratch_implicit[i], var_symbol) + for i ∈ 1:length(scratch_implicit)) + end end error_coefs = @view t_params.rk_coefs[:,end] + if t_params.rk_coefs_implicit === nothing + error_coefs_implicit = nothing + else + error_coefs_implicit = @view t_params.rk_coefs_implicit[:,end] + end # The second element of `scratch` is not needed any more for the RK update, so we can # overwrite it with the error estimate. @@ -321,17 +459,20 @@ function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=fals if neutrals if t_params.low_storage - rk_update_loop_neutrals_low_storage!(error_coefs, var_arrays...; + rk_update_loop_neutrals_low_storage!(error_coefs, error_coefs_implicit, + var_arrays..., var_arrays_implicit...; output=output) else - rk_update_loop_neutrals!(error_coefs, var_arrays; output=output) + rk_update_loop_neutrals!(error_coefs, error_coefs_implicit, var_arrays, + var_arrays_implicit; output=output) end else if t_params.low_storage - rk_update_loop_low_storage!(error_coefs, var_arrays...; - output=output) + rk_update_loop_low_storage!(error_coefs, error_coefs_implicit, var_arrays..., + var_arrays_implicit...; output=output) else - rk_update_loop!(error_coefs, var_arrays; output=output) + rk_update_loop!(error_coefs, error_coefs_implicit, var_arrays, + var_arrays_implicit; output=output) end end @@ -339,172 +480,295 @@ function rk_error_variable!(scratch, var_symbol::Symbol, t_params; neutrals=fals end # Ion distribution function -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,5}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,5}, old::AbstractArray{mk_float,5}, - first::AbstractArray{mk_float,5}; output=new) + first::AbstractArray{mk_float,5}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_s_r_z_vperp_vpa_region() - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] + - rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] + - rk_coefs[3]*new[ivpa,ivperp,iz,ir,is] + if rk_coefs_implicit === nothing + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir,is] + end + else + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = rk_coefs[1]*first[ivpa,ivperp,iz,ir,is] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir,is] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir,is] + + rk_coefs_implicit[1]*first_implicit[ivpa,ivperp,iz,ir,is] + + rk_coefs_implicit[2]*old_implicit[ivpa,ivperp,iz,ir,is] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,5}}; - output=var_arrays[N]) where N +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,5}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_s_r_z_vperp_vpa_region() - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir,is] = - sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N) + end + else + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir,is] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivpa,ivperp,iz,ir,is] for i ∈ 1:N-1) + end end return nothing end # Ion moments -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,3}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,3}, old::AbstractArray{mk_float,3}, - first::AbstractArray{mk_float,3}; output=new) + first::AbstractArray{mk_float,3}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_s_r_z_region() - @loop_s_r_z is ir iz begin - output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] + - rk_coefs[2]*old[iz,ir,is] + - rk_coefs[3]*new[iz,ir,is] + if rk_coefs_implicit === nothing + @loop_s_r_z is ir iz begin + output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] + + rk_coefs[2]*old[iz,ir,is] + + rk_coefs[3]*new[iz,ir,is] + end + else + @loop_s_r_z is ir iz begin + output[iz,ir,is] = rk_coefs[1]*first[iz,ir,is] + + rk_coefs[2]*old[iz,ir,is] + + rk_coefs[3]*new[iz,ir,is] + + rk_coefs_implicit[1]*first_implicit[iz,ir,is] + + rk_coefs_implicit[2]*old_implicit[iz,ir,is] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,3}}; - output=var_arrays[N]) where N +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,3}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_s_r_z_region() - @loop_s_r_z is ir iz begin - output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_s_r_z is ir iz begin + output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N) + end + else + @loop_s_r_z is ir iz begin + output[iz,ir,is] = sum(rk_coefs[i] * var_arrays[i][iz,ir,is] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir,is] for i ∈ 1:N-1) + end end return nothing end # Electron distribution function -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,4}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,4}, old::AbstractArray{mk_float,4}, - first::AbstractArray{mk_float,4}; output=new) + first::AbstractArray{mk_float,4}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_r_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] + - rk_coefs[2]*old[ivpa,ivperp,iz,ir] + - rk_coefs[3]*new[ivpa,ivperp,iz,ir] + if rk_coefs_implicit === nothing + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir] + end + else + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = rk_coefs[1]*first[ivpa,ivperp,iz,ir] + + rk_coefs[2]*old[ivpa,ivperp,iz,ir] + + rk_coefs[3]*new[ivpa,ivperp,iz,ir] + + rk_coefs_implicit[1]*first_implicit[ivpa,ivperp,iz,ir] + + rk_coefs_implicit[2]*old_implicit[ivpa,ivperp,iz,ir] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,4}}; - output=var_arrays[N]) where N +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,4}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_r_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - output[ivpa,ivperp,iz,ir] = - sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N) + end + else + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + output[ivpa,ivperp,iz,ir] = + sum(rk_coefs[i] * var_arrays[i][ivpa,ivperp,iz,ir] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivpa,ivperp,iz,ir] + for i ∈ 1:N-1) + end end return nothing end # Electron moments -function rk_update_loop_low_storage!(rk_coefs, new::AbstractArray{mk_float,2}, +function rk_update_loop_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,2}, old::AbstractArray{mk_float,2}, - first::AbstractArray{mk_float,2}; output=new) + first::AbstractArray{mk_float,2}, new_implicit, + old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_r_z_region() - @loop_r_z ir iz begin - output[iz,ir] = rk_coefs[1]*first[iz,ir] + - rk_coefs[2]*old[iz,ir] + - rk_coefs[3]*new[iz,ir] + if rk_coefs_implicit === nothing + @loop_r_z ir iz begin + output[iz,ir] = rk_coefs[1]*first[iz,ir] + + rk_coefs[2]*old[iz,ir] + + rk_coefs[3]*new[iz,ir] + end + else + @loop_r_z ir iz begin + output[iz,ir] = rk_coefs[1]*first[iz,ir] + + rk_coefs[2]*old[iz,ir] + + rk_coefs[3]*new[iz,ir] + + rk_coefs_implicit[1]*first_implicit[iz,ir] + + rk_coefs_implicit[2]*old_implicit[iz,ir] + end end return nothing end -function rk_update_loop!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,2}}; +function rk_update_loop!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,2}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_r_z_region() - @loop_r_z ir iz begin - output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_r_z ir iz begin + output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N) + end + else + @loop_r_z ir iz begin + output[iz,ir] = sum(rk_coefs[i] * var_arrays[i][iz,ir] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir] + for i ∈ 1:N-1) + end end return nothing end # Neutral distribution function -function rk_update_loop_neutrals_low_storage!(rk_coefs, new::AbstractArray{mk_float,6}, - old::AbstractArray{mk_float,6}, - first::AbstractArray{mk_float,6}; output=new) +function rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,6}, + old::AbstractArray{mk_float,6}, + first::AbstractArray{mk_float,6}, + new_implicit, old_implicit, first_implicit; + output=new) @boundscheck length(rk_coefs) == 3 begin_sn_r_z_vzeta_vr_vz_region() - @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin - output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] + - rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] + - rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn] + if rk_coefs_implicit === nothing + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn] + end + else + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = rk_coefs[1]*first[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[2]*old[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs[3]*new[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs_implicit[1]*first_implicit[ivz,ivr,ivzeta,iz,ir,isn] + + rk_coefs_implicit[2]*old_implicit[ivz,ivr,ivzeta,iz,ir,isn] + end end return nothing end -function rk_update_loop_neutrals!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,6}}; - output=var_arrays[N]) where N +function rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,6}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_sn_r_z_vzeta_vr_vz_region() - @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin - output[ivz,ivr,ivzeta,iz,ir,isn] = - sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = + sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N) + end + else + @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin + output[ivz,ivr,ivzeta,iz,ir,isn] = + sum(rk_coefs[i] * var_arrays[i][ivz,ivr,ivzeta,iz,ir,isn] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][ivz,ivr,ivzeta,iz,ir,isn] + for i ∈ 1:N-1) + end end return nothing end # Neutral moments -function rk_update_loop_neutrals_low_storage!(rk_coefs, new::AbstractArray{mk_float,3}, +function rk_update_loop_neutrals_low_storage!(rk_coefs, rk_coefs_implicit, + new::AbstractArray{mk_float,3}, old::AbstractArray{mk_float,3}, - first::AbstractArray{mk_float,3}; + first::AbstractArray{mk_float,3}, + new_implicit, old_implicit, first_implicit; output=new) @boundscheck length(rk_coefs) == 3 begin_sn_r_z_region() - @loop_sn_r_z isn ir iz begin - output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] + - rk_coefs[2]*old[iz,ir,isn] + - rk_coefs[3]*new[iz,ir,isn] + if rk_coefs_implicit === nothing + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] + + rk_coefs[2]*old[iz,ir,isn] + + rk_coefs[3]*new[iz,ir,isn] + end + else + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = rk_coefs[1]*first[iz,ir,isn] + + rk_coefs[2]*old[iz,ir,isn] + + rk_coefs[3]*new[iz,ir,isn] + + rk_coefs_implicit[1]*first_implicit[iz,ir,isn] + + rk_coefs_implicit[2]*old_implicit[iz,ir,isn] + end end return nothing end -function rk_update_loop_neutrals!(rk_coefs, - var_arrays::NTuple{N,AbstractArray{mk_float,3}}; - output=var_arrays[N]) where N +function rk_update_loop_neutrals!(rk_coefs, rk_coefs_implicit, + var_arrays::NTuple{N,AbstractArray{mk_float,3}}, + var_arrays_implicit; output=var_arrays[N]) where N @boundscheck length(rk_coefs) ≥ N begin_sn_r_z_region() - @loop_sn_r_z isn ir iz begin - output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N) + if rk_coefs_implicit === nothing + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N) + end + else + @loop_sn_r_z isn ir iz begin + output[iz,ir,isn] = sum(rk_coefs[i] * var_arrays[i][iz,ir,isn] for i ∈ 1:N) + + sum(rk_coefs_implicit[i] * var_arrays_implicit[i][iz,ir,isn] + for i ∈ 1:N-1) + end end return nothing diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 3ad01b14b..dcdc5911b 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -233,7 +233,8 @@ Create a [`input_structs.time_info`](@ref) struct using the settings in `t_input """ function setup_time_info(t_input, n_variables, code_time, dt_reload, dt_before_last_fail_reload, manufactured_solns_input, io_input) - rk_coefs, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor = + rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order, + adaptive, low_storage, CFL_prefactor = setup_runge_kutta_coefficients!(t_input.type, t_input.CFL_prefactor, t_input.split_operators) @@ -296,8 +297,9 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, next_output_time, dt_before_output, dt_before_last_fail, CFL_prefactor, step_to_output, Ref(0), Ref(0), mk_int[], mk_int[], t_input.nwrite, t_input.nwrite_dfns, moments_output_times, - dfns_output_times, t_input.type, rk_coefs, n_rk_stages, rk_order, - adaptive, low_storage, t_input.rtol, t_input.atol, t_input.atol_upar, + dfns_output_times, t_input.type, rk_coefs, rk_coefs_implicit, + implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, + low_storage, t_input.rtol, t_input.atol, t_input.atol_upar, t_input.step_update_prefactor, t_input.max_increase_factor, t_input.max_increase_factor_near_last_fail, t_input.last_fail_proximity_factor, t_input.minimum_dt, @@ -422,7 +424,13 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # create an array of structs containing scratch arrays for the pdf and low-order moments # that may be evolved separately via fluid equations - scratch = setup_scratch_arrays(moments, pdf.ion.norm, pdf.neutral.norm, t_params.n_rk_stages) + n_rk_stages = t_params.n_rk_stages + scratch = setup_scratch_arrays(moments, pdf, n_rk_stages + 1) + if t_params.rk_coefs_implicit !== nothing + scratch_implicit = setup_scratch_arrays(moments, pdf, n_rk_stages) + else + scratch_implicit = nothing + end # setup dummy arrays & buffer arrays for z r MPI n_neutral_species_alloc = max(1,composition.n_neutral_species) # create arrays for Fokker-Planck collisions @@ -644,8 +652,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # Ensure all processes are synchronized at the end of the setup _block_synchronize() - return moments, spectral_objects, scratch, advance, t_params, fp_arrays, gyroavs, - manufactured_source_list + return moments, spectral_objects, scratch, scratch_implicit, advance, t_params, + fp_arrays, gyroavs, manufactured_source_list end """ @@ -937,16 +945,19 @@ end create an array of structs containing scratch arrays for the normalised pdf and low-order moments that may be evolved separately via fluid equations """ -function setup_scratch_arrays(moments, pdf_ion_in, pdf_neutral_in, n_rk_stages) - # create n_rk_stages+1 structs, each of which will contain one pdf, - # one density, and one parallel flow array - scratch = Vector{scratch_pdf{5,3,6,3}}(undef, n_rk_stages+1) - pdf_dims = size(pdf_ion_in) +function setup_scratch_arrays(moments, pdf, n) + # will create n_rk_stages+1 structs, each of which will contain one pdf, + # density, parallel flow, parallel pressure, and perpendicular pressure array for ions + # (possibly) the same for electrons, and the same for neutrals. The actual array will + # be created at the end of the first step of the loop below, once we have a + # `scratch_pdf` object of the correct type. + scratch = Vector{scratch_pdf{5,3,6,3}}(undef, n) + pdf_dims = size(pdf.ion.norm) moment_dims = size(moments.ion.dens) - pdf_neutral_dims = size(pdf_neutral_in) + pdf_neutral_dims = size(pdf.neutral.norm) moment_neutral_dims = size(moments.neutral.dens) # populate each of the structs - for istage ∈ 1:n_rk_stages+1 + for istage ∈ 1:n # Allocate arrays in temporary variables so that we can identify them # by source line when using @debug_shared_array pdf_array = allocate_shared_float(pdf_dims...) @@ -967,13 +978,13 @@ function setup_scratch_arrays(moments, pdf_ion_in, pdf_neutral_in, n_rk_stages) pdf_neutral_array, density_neutral_array, uz_neutral_array, pz_neutral_array) @serial_region begin - scratch[istage].pdf .= pdf_ion_in + scratch[istage].pdf .= pdf.ion.norm scratch[istage].density .= moments.ion.dens scratch[istage].upar .= moments.ion.upar scratch[istage].ppar .= moments.ion.ppar scratch[istage].pperp .= moments.ion.pperp - scratch[istage].pdf_neutral .= pdf_neutral_in + scratch[istage].pdf_neutral .= pdf.neutral.norm scratch[istage].density_neutral .= moments.neutral.dens scratch[istage].uz_neutral .= moments.neutral.uz scratch[istage].pz_neutral .= moments.neutral.pz @@ -992,7 +1003,7 @@ time integrator can be used without severe CFL condition """ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advect_objects, - composition, collisions, geometry, gyroavs, boundary_distributions, + composition, collisions, geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, manufactured_source_list, ascii_io, io_moments, io_dfns) @@ -1063,16 +1074,21 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr if t_params.split_operators # MRH NOT SUPPORTED - time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, - vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, - advance, t_params.step_counter[]) + time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_params, + vpa, z, vpa_spectral, z_spectral, moments, + fields, vpa_advect, z_advect, composition, + collisions, external_source_settings, + num_diss_params, advance, + t_params.step_counter[]) else - time_advance_no_splitting!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, - composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, diagnostic_checks, t_params.step_counter[]) + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vz, + vr, vzeta, vpa, vperp, gyrophase, z, r, moments, + fields, spectral_objects, advect_objects, + composition, collisions, geometry, gyroavs, + boundary_distributions, external_source_settings, + num_diss_params, advance, fp_arrays, + scratch_dummy, manufactured_source_list, + diagnostic_checks, t_params.step_counter[]) end # update the time t += t_params.previous_dt[] @@ -1302,9 +1318,11 @@ end """ """ -function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, - vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, istep) +function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_params, vpa, + z, vpa_spectral, z_spectral, moments, fields, + vpa_advect, z_advect, composition, collisions, + external_source_settings, num_diss_params, advance, + istep) # define some abbreviated variables for tidiness n_ion_species = composition.n_ion_species @@ -1317,7 +1335,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # advance the operator-split 1D advection equation in vpa # vpa-advection only applies for ion species advance.vpa_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1325,7 +1343,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # z_advection! advances the operator-split 1D advection equation in z # apply z-advection operation to all species (ion and neutral) advance.z_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1334,7 +1352,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, if composition.n_neutral_species > 0 if collisions.charge_exchange > 0.0 advance.cx_collisions = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1342,7 +1360,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, end if collisions.ionization > 0.0 advance.ionization_collisions = true - time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1351,7 +1369,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, end if collisions.krook_collision_frequency_prefactor > 0.0 advance.krook_collisions_ii = true - time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, z_SL, vpa_SL, composition, collisions, sources, num_diss_params, advance, istep) @@ -1361,7 +1379,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # to the kinetic equation if moments.evolve_density || moments.evolve_upar || moments.evolve_ppar advance.source_terms = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1370,7 +1388,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # use the continuity equation to update the density if moments.evolve_density advance.continuity = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1379,7 +1397,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # use force balance to update the parallel flow if moments.evolve_upar advance.force_balance = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1388,7 +1406,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # use the energy equation to update the parallel pressure if moments.evolve_ppar advance.energy = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1398,7 +1416,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # use the energy equation to update the parallel pressure if moments.evolve_ppar advance.energy = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1407,7 +1425,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # use force balance to update the parallel flow if moments.evolve_upar advance.force_balance = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1416,7 +1434,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # use the continuity equation to update the density if moments.evolve_density advance.continuity = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1426,7 +1444,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # to the kinetic equation if moments.evolve_density || moments.evolve_upar || moments.evolve_ppar advance.source_terms = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1436,7 +1454,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, if composition.n_neutral_species > 0 if collisions.ionization > 0.0 advance.ionization = true - time_advance_no_splitting!(pdf, scratch, t, t_params, z, vpa, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1444,7 +1462,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, end if collisions.charge_exchange > 0.0 advance.cx_collisions = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1454,7 +1472,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # z_advection! advances the operator-split 1D advection equation in z # apply z-advection operation to all species (ion and neutral) advance.z_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1462,7 +1480,7 @@ function time_advance_split_operators!(pdf, scratch, t, t_params, vpa, z, # advance the operator-split 1D advection equation in vpa # vpa-advection only applies for ion species advance.vpa_advection = true - time_advance_no_splitting!(pdf, scratch, t, t_params, vpa, z, + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, advance, istep) @@ -1473,16 +1491,19 @@ end """ """ -function time_advance_no_splitting!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, - composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, diagnostic_checks, istep) - - ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, composition, collisions, - geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, - advance, fp_arrays, scratch_dummy, manufactured_source_list, diagnostic_checks, istep) +function time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, + vzeta, vpa, vperp, gyrophase, z, r, moments, fields, + spectral_objects, advect_objects, composition, + collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, advance, + fp_arrays, scratch_dummy, manufactured_source_list, + diagnostic_checks, istep) + + ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, moments, fields, spectral_objects, advect_objects, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, + manufactured_source_list, diagnostic_checks, istep) return nothing end @@ -1491,7 +1512,7 @@ end Use the result of the forward-Euler timestep and the previous Runge-Kutta stages to compute the updated pdfs, and any evolved moments. """ -function rk_update!(scratch, moments, t_params, istage, composition) +function rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) begin_s_r_z_region() new_scratch = scratch[istage+1] @@ -1503,17 +1524,17 @@ function rk_update!(scratch, moments, t_params, istage, composition) ## # here we seem to have duplicate arrays for storing n, u||, p||, etc, but not for vth # 'scratch' is for the multiple stages of time advanced quantities, but 'moments' can be updated directly at each stage - rk_update_variable!(scratch, :pdf, t_params, istage) + rk_update_variable!(scratch, scratch_implicit, :pdf, t_params, istage) # use Runge Kutta to update any velocity moments evolved separately from the pdf - rk_update_evolved_moments!(scratch, moments, t_params, istage) + rk_update_evolved_moments!(scratch, scratch_implicit, moments, t_params, istage) if composition.n_neutral_species > 0 ## # update the neutral particle distribution and moments ## - rk_update_variable!(scratch, :pdf_neutral, t_params, istage; neutrals=true) + rk_update_variable!(scratch, scratch_implicit, :pdf_neutral, t_params, istage; neutrals=true) # use Runge Kutta to update any velocity moments evolved separately from the pdf - rk_update_evolved_moments_neutral!(scratch, moments, t_params, istage) + rk_update_evolved_moments_neutral!(scratch, scratch_implicit, moments, t_params, istage) end end @@ -1639,14 +1660,20 @@ function apply_all_bcs_constraints_update_moments!( end """ - adaptive_timestep_update!(scratch, t_params, rk_coefs, moments, n_neutral_species) + adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, + fields, composition, collisions, geometry, + external_source_settings, spectral_objects, + advect_objects, gyroavs, num_diss_params, advance, + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz) Check the error estimate for the embedded RK method and adjust the timestep if appropriate. """ -function adaptive_timestep_update!(scratch, t, t_params, moments, fields, composition, - collisions, geometry, external_source_settings, - advect_objects, r, z, vperp, vpa, vzeta, vr, vz) +function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, + fields, composition, collisions, geometry, + external_source_settings, spectral_objects, + advect_objects, gyroavs, num_diss_params, advance, + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz) #error_norm_method = "Linf" error_norm_method = "L2" @@ -1715,10 +1742,11 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos # Calculate error for ion distribution functions # Note rk_error_variable!() stores the calculated error in `scratch[2]`. - rk_error_variable!(scratch, :pdf, t_params) - ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[end].pdf, t_params.rtol, - t_params.atol; method=error_norm_method, - skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, + rk_error_variable!(scratch, scratch_implicit, :pdf, t_params) + ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[t_params.n_rk_stages+1].pdf, + t_params.rtol, t_params.atol; + method=error_norm_method, skip_r_inner=skip_r_inner, + skip_z_lower=skip_z_lower, error_sum_zero=t_params.error_sum_zero) push!(error_norms, ion_pdf_error) push!(total_points, @@ -1727,8 +1755,9 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos # Calculate error for ion moments, if necessary if moments.evolve_density begin_s_r_z_region() - rk_error_variable!(scratch, :density, t_params) - ion_n_err = local_error_norm(scratch[2].density, scratch[end].density, + rk_error_variable!(scratch, scratch_implicit, :density, t_params) + ion_n_err = local_error_norm(scratch[2].density, + scratch[t_params.n_rk_stages+1].density, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, @@ -1738,8 +1767,9 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_upar begin_s_r_z_region() - rk_error_variable!(scratch, :upar, t_params) - ion_u_err = local_error_norm(scratch[2].upar, scratch[end].upar, t_params.rtol, + rk_error_variable!(scratch, scratch_implicit, :upar, t_params) + ion_u_err = local_error_norm(scratch[2].upar, + scratch[t_params.n_rk_stages+1].upar, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, error_sum_zero=t_params.error_sum_zero) @@ -1748,8 +1778,9 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_ppar begin_s_r_z_region() - rk_error_variable!(scratch, :ppar, t_params) - ion_p_err = local_error_norm(scratch[2].ppar, scratch[end].ppar, t_params.rtol, + rk_error_variable!(scratch, scratch_implicit, :ppar, t_params) + ion_p_err = local_error_norm(scratch[2].ppar, + scratch[t_params.n_rk_stages+1].ppar, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, skip_z_lower=skip_z_lower, error_sum_zero=t_params.error_sum_zero) @@ -1790,7 +1821,7 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos push!(CFL_limits, t_params.CFL_prefactor * neutral_vz_CFL) # Calculate error for neutral distribution functions - rk_error_variable!(scratch, :pdf_neutral, t_params; neutrals=true) + rk_error_variable!(scratch, scratch_implicit, :pdf_neutral, t_params; neutrals=true) neut_pdf_error = local_error_norm(scratch[2].pdf_neutral, scratch[end].pdf_neutral, t_params.rtol, t_params.atol; method=error_norm_method, @@ -1805,7 +1836,7 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos # Calculate error for neutral moments, if necessary if moments.evolve_density begin_sn_r_z_region() - rk_error_variable!(scratch, :density_neutral, t_params; neutrals=true) + rk_error_variable!(scratch, scratch_implicit, :density_neutral, t_params; neutrals=true) neut_n_err = local_error_norm(scratch[2].density_neutral, scratch[end].density_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, @@ -1817,8 +1848,9 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_upar begin_sn_r_z_region() - rk_error_variable!(scratch, :uz_neutral, t_params; neutrals=true) - neut_u_err = local_error_norm(scratch[2].uz_neutral, scratch[end].uz_neutral, + rk_error_variable!(scratch, scratch_implicit, :uz_neutral, t_params; neutrals=true) + neut_u_err = local_error_norm(scratch[2].uz_neutral, + scratch[t_params.n_rk_stages+1].uz_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, skip_r_inner=skip_r_inner, @@ -1829,8 +1861,9 @@ function adaptive_timestep_update!(scratch, t, t_params, moments, fields, compos end if moments.evolve_ppar begin_sn_r_z_region() - rk_error_variable!(scratch, :pz_neutral, t_params; neutrals=true) - neut_p_err = local_error_norm(scratch[2].pz_neutral, scratch[end].pz_neutral, + rk_error_variable!(scratch, scratch_implicit, :pz_neutral, t_params; neutrals=true) + neut_p_err = local_error_norm(scratch[2].pz_neutral, + scratch[t_params.n_rk_stages+1].pz_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, skip_r_inner=skip_r_inner, @@ -1943,10 +1976,11 @@ end """ """ -function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, composition, collisions, - geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, - advance, fp_arrays, scratch_dummy, manufactured_source_list,diagnostic_checks, istep) +function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, moments, fields, spectral_objects, advect_objects, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, advance, fp_arrays, + scratch_dummy, manufactured_source_list,diagnostic_checks, istep) begin_s_r_z_region() @@ -1982,16 +2016,20 @@ function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase end for istage ∈ 1:n_rk_stages + if global_rank[] == 0 + println("ion step ", t_params.step_counter[], ".", istage, " ", t) + end # do an Euler time advance, with scratch[2] containing the advanced quantities # and scratch[1] containing quantities at time level n update_solution_vector!(scratch, moments, istage, composition, vpa, vperp, z, r) # calculate f^{(1)} = fⁿ + Δt*G[fⁿ] = scratch[2].pdf - euler_time_advance!(scratch[istage+1], scratch[istage], - pdf, fields, moments, - advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, - t_params.dt[], spectral_objects, composition, - collisions, geometry, scratch_dummy, manufactured_source_list, - external_source_settings, num_diss_params, advance, fp_arrays, istage) + euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, + r, t, t_params.dt[], spectral_objects, composition, + collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, advance, fp_arrays, istage) + diagnostic_moments = diagnostic_checks && istage == n_rk_stages rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) apply_all_bcs_constraints_update_moments!( @@ -2000,6 +2038,14 @@ function ssp_rk!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments) end + if t_params.adaptive + adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, fields, + composition, collisions, geometry, + external_source_settings, spectral_objects, + advect_objects, gyroavs, num_diss_params, advance, + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz) + end + istage = n_rk_stages+1 # update the pdf.norm and moments arrays as needed @@ -2292,13 +2338,22 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, return nothing end +function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, vz, vr, + vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, + composition, collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, advance, fp_arrays, istage) + + # No terms supported here yet + + return nothing +end + """ update the vector containing the pdf and any evolved moments of the pdf for use in the Runge-Kutta time advance """ -function update_solution_vector!(evolved, moments, istage, composition, vpa, vperp, z, r) - new_evolved = evolved[istage+1] - old_evolved = evolved[istage] +function update_solution_vector!(new_evolved, old_evolved, moments, composition, vpa, vperp, z, r) begin_s_r_z_region() @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin new_evolved.pdf[ivpa,ivperp,iz,ir,is] = old_evolved.pdf[ivpa,ivperp,iz,ir,is] From 3af052f32af62593420a7926215fa09c118a2632 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 2 May 2024 17:49:50 +0100 Subject: [PATCH 28/75] Jacobian-free Newton-Krylov (GMRES) nonlinear solver --- moment_kinetics/Project.toml | 1 + moment_kinetics/src/file_io.jl | 100 +++- moment_kinetics/src/moment_kinetics.jl | 18 +- moment_kinetics/src/nonlinear_solvers.jl | 669 +++++++++++++++++++++++ moment_kinetics/src/time_advance.jl | 221 ++++++-- 5 files changed, 933 insertions(+), 76 deletions(-) create mode 100644 moment_kinetics/src/nonlinear_solvers.jl diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml index 45d1af96d..8de860c52 100644 --- a/moment_kinetics/Project.toml +++ b/moment_kinetics/Project.toml @@ -17,6 +17,7 @@ LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882" LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" +MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 16c93099a..6609d275c 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -55,7 +55,7 @@ moments & fields only struct io_moments_info{Tfile, Ttime, Tphi, Tmomi, Tmomn, Tchodura_lower, Tchodura_upper, Texti1, Texti2, Texti3, Texti4, Texti5, Textn1, Textn2, Textn3, Textn4, Textn5, Tconstri, Tconstrn, - Tint, Tfailcause} + Tint, Tfailcause, Tnldiagnostics} # file identifier for the binary file to which data is written fid::Tfile # handle for the time variable @@ -126,6 +126,10 @@ struct io_moments_info{Tfile, Ttime, Tphi, Tmomi, Tmomn, Tchodura_lower, # Last successful timestep before most recent timestep failure, used by adaptve # timestepping algorithm dt_before_last_fail::Ttime + # Variables recording diagnostic information about non-linear solvers (used for + # implicit parts of timestep). These are stored in nested NamedTuples so that we can + # write diagnostics generically for as many nonlinear solvers as are created. + nl_solver_diagnostics::Tnldiagnostics # Use parallel I/O? parallel_io::Bool @@ -194,7 +198,7 @@ open the necessary output files function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, restart_time_index, - previous_runs_info, time_for_setup) + previous_runs_info, time_for_setup, nl_solver_params) begin_serial_region() @serial_region begin # Only read/write from first process in each 'block' @@ -222,13 +226,14 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe external_source_settings, input_dict, io_input.parallel_io, comm_inter_block[], run_id, restart_time_index, previous_runs_info, - time_for_setup) + time_for_setup, nl_solver_params) io_dfns = setup_dfns_io(out_prefix, io_input.binary_format, boundary_distributions, r, z, vperp, vpa, vzeta, vr, vz, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, io_input.parallel_io, comm_inter_block[], run_id, - restart_time_index, previous_runs_info, time_for_setup) + restart_time_index, previous_runs_info, time_for_setup, + nl_solver_params) return ascii, io_moments, io_dfns end @@ -644,7 +649,8 @@ define dynamic (time-evolving) moment variables for writing to the hdf5 file function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, r::coordinate, z::coordinate, parallel_io, external_source_settings, evolve_density, - evolve_upar, evolve_ppar) + evolve_upar, evolve_ppar, + nl_solver_params) @serial_region begin dynamic = create_io_group(fid, "dynamic_data", description="time evolving variables") @@ -718,6 +724,21 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, description="Last successful timestep before most recent timestep failure, " * "used by adaptve timestepping algorithm") + io_nl_solver_diagnostics = NamedTuple( + term=>(n_solves=create_dynamic_variable!( + dynamic, "$(term)_n_solves", mk_int; parallel_io=parallel_io, + description="Number of nonlinear solves for $term"), + nonlinear_iterations=create_dynamic_variable!( + dynamic, "$(term)_nonlinear_iterations", mk_int; + parallel_io=parallel_io, + description="Number of nonlinear iterations for $term"), + linear_iterations=create_dynamic_variable!( + dynamic, "$(term)_linear_iterations", mk_int; + parallel_io=parallel_io, + description="Number of linear iterations for $term"), + ) + for term ∈ keys(nl_solver_params) if term !== nothing) + return io_moments_info(fid, io_time, io_phi, io_Er, io_Ez, io_density, io_upar, io_ppar, io_pperp, io_qpar, io_vth, io_dSdt, io_chodura_lower, io_chodura_upper, io_density_neutral, io_uz_neutral, @@ -740,7 +761,8 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, neutral_constraints_C_coefficient, io_time_for_run, io_step_counter, io_dt, io_failure_counter, io_failure_caused_by, - io_limit_caused_by, io_dt_before_last_fail, parallel_io) + io_limit_caused_by, io_dt_before_last_fail, io_nl_solver_diagnostics, + parallel_io) end # For processes other than the root process of each shared-memory group... @@ -1073,7 +1095,8 @@ file """ function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, composition, parallel_io, external_source_settings, - evolve_density, evolve_upar, evolve_ppar) + evolve_density, evolve_upar, evolve_ppar, + nl_solver_params) @serial_region begin io_moments = define_dynamic_moment_variables!(fid, composition.n_ion_species, @@ -1081,7 +1104,8 @@ function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, com parallel_io, external_source_settings, evolve_density, evolve_upar, - evolve_ppar) + evolve_ppar, + nl_solver_params) dynamic = get_group(fid, "dynamic_data") @@ -1152,7 +1176,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, parallel_io, io_comm, run_id, restart_time_index, previous_runs_info, - time_for_setup) + time_for_setup, nl_solver_params) @serial_region begin moments_prefix = string(prefix, ".moments") if !parallel_io @@ -1182,7 +1206,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z io_moments = define_dynamic_moment_variables!( fid, composition.n_ion_species, composition.n_neutral_species, r, z, parallel_io, external_source_settings, evolve_density, evolve_upar, - evolve_ppar) + evolve_ppar, nl_solver_params) close(fid) @@ -1206,6 +1230,15 @@ function reopen_moments_io(file_info) function getvar(name) if name ∈ variable_list return dyn[name] + elseif name == "nl_solver_diagnostics" + nl_names = (name for name ∈ variable_list + if occursin("_nonlinear_iterations", name)) + nl_prefixes = (split(name, "_nonlinear_iterations")[1] + for name ∈ nl_names) + return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"], + nonlinear_iterations=dyn["$(term)_nonlinear_iterations"], + linear_iterations=dyn["$(term)_linear_iterations"]) + for term ∈ nl_prefixes) else return nothing end @@ -1238,7 +1271,8 @@ function reopen_moments_io(file_info) getvar("time_for_run"), getvar("step_counter"), getvar("dt"), getvar("failure_counter"), getvar("failure_caused_by"), getvar("limit_caused_by"), - getvar("dt_before_last_fail"), parallel_io) + getvar("dt_before_last_fail"), + getvar("nl_solver_diagnostics"), parallel_io) end # For processes other than the root process of each shared-memory group... @@ -1252,7 +1286,7 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper vzeta, vr, vz, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, parallel_io, io_comm, run_id, restart_time_index, - previous_runs_info, time_for_setup) + previous_runs_info, time_for_setup, nl_solver_params) @serial_region begin dfns_prefix = string(prefix, ".dfns") @@ -1288,7 +1322,8 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper ### in a struct for later access ### io_dfns = define_dynamic_dfn_variables!( fid, r, z, vperp, vpa, vzeta, vr, vz, composition, parallel_io, - external_source_settings, evolve_density, evolve_upar, evolve_ppar) + external_source_settings, evolve_density, evolve_upar, evolve_ppar, + nl_solver_params) close(fid) @@ -1312,6 +1347,15 @@ function reopen_dfns_io(file_info) function getvar(name) if name ∈ variable_list return dyn[name] + elseif name == "nl_solver_diagnostics" + nl_names = (name for name ∈ variable_list + if occursin("_nonlinear_iterations", name)) + nl_prefixes = (split(name, "_nonlinear_iterations")[1] + for name ∈ nl_names) + return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"], + nonlinear_iterations=dyn["$(term)_nonlinear_iterations"], + linear_iterations=dyn["$(term)_linear_iterations"]) + for term ∈ nl_prefixes) else return nothing end @@ -1346,7 +1390,8 @@ function reopen_dfns_io(file_info) getvar("dt"), getvar("failure_counter"), getvar("failure_caused_by"), getvar("limit_caused_by"), - getvar("dt_before_last_fail"), parallel_io) + getvar("dt_before_last_fail"), + getvar("nl_solver_diagnostics"), parallel_io) return io_dfns_info(fid, getvar("f"), getvar("f_neutral"), parallel_io, io_moments) @@ -1382,7 +1427,8 @@ write time-dependent moments data for ions and neutrals to the binary output fil """ function write_all_moments_data_to_binary(moments, fields, t, n_ion_species, n_neutral_species, io_or_file_info_moments, - t_idx, time_for_run, t_params, r, z) + t_idx, time_for_run, t_params, nl_solver_params, + r, z) @serial_region begin # Only read/write from first process in each 'block' @@ -1419,6 +1465,17 @@ function write_all_moments_data_to_binary(moments, fields, t, n_ion_species, only_root=true) append_to_dynamic_var(io_moments.dt_before_last_fail, t_params.dt_before_last_fail[], t_idx, parallel_io) + for (k,v) ∈ pairs(nl_solver_params) + if v === nothing + continue + end + append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].n_solves, + v.n_solves[], t_idx, parallel_io) + append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].nonlinear_iterations, + v.nonlinear_iterations[], t_idx, parallel_io) + append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].linear_iterations, + v.linear_iterations[], t_idx, parallel_io) + end closefile && close(io_moments.fid) end @@ -1619,8 +1676,8 @@ binary output file """ function write_all_dfns_data_to_binary(pdf, moments, fields, t, n_ion_species, n_neutral_species, io_or_file_info_dfns, t_idx, - time_for_run, t_params, r, z, vperp, vpa, vzeta, vr, - vz) + time_for_run, t_params, nl_solver_params, r, z, + vperp, vpa, vzeta, vr, vz) @serial_region begin # Only read/write from first process in each 'block' @@ -1636,7 +1693,7 @@ function write_all_dfns_data_to_binary(pdf, moments, fields, t, n_ion_species, # This also updates the time. write_all_moments_data_to_binary(moments, fields, t, n_ion_species, n_neutral_species, io_dfns.io_moments, t_idx, - time_for_run, t_params, r, z) + time_for_run, t_params, nl_solver_params, r, z) # add the distribution function data at this time slice to the output file write_ion_dfns_data_to_binary(pdf.ion.norm, n_ion_species, io_dfns, t_idx, r, z, @@ -1901,7 +1958,7 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor #qr_neutral=nothing, qzeta_neutral=nothing, vth_neutral=nothing, phi=nothing, Er=nothing, Ez=nothing, - istage=0, label="") + istage=0, label="", nl_solver_params=()) global debug_output_file # Only read/write from first process in each 'block' @@ -1933,11 +1990,12 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor r, z, false, external_source_settings, evolve_density, evolve_upar, - evolve_ppar) + evolve_ppar, + nl_solver_params) io_dfns = define_dynamic_dfn_variables!( fid, r, z, vperp, vpa, vzeta, vr, vz, composition.n_ion_species, composition.n_neutral_species, false, external_source_settings, - evolve_density, evolve_upar, evolve_ppar) + evolve_density, evolve_upar, evolve_ppar, nl_solver_params) # create the "istage" variable, used to identify the rk stage where # `debug_dump()` was called diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 65f0cd93f..e14d414d2 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -30,6 +30,7 @@ include("quadrature.jl") include("hermite_spline_interpolation.jl") include("derivatives.jl") include("input_structs.jl") +include("nonlinear_solvers.jl") include("runge_kutta.jl") include("reference_parameters.jl") include("coordinates.jl") @@ -318,8 +319,8 @@ function setup_moment_kinetics(input_dict::AbstractDict; # create arrays and do other work needed to setup # the main time advance loop -- including normalisation of f by density if requested - moments, spectral_objects, scratch, scratch_implicit, advance, t_params, fp_arrays, - gyroavs, manufactured_source_list = + moments, spectral_objects, scratch, scratch_implicit, advance, advance_implicit, + t_params, fp_arrays, gyroavs, manufactured_source_list, nl_solver_params = setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrophase, vz_spectral, vr_spectral, vzeta_spectral, vpa_spectral, vperp_spectral, z_spectral, r_spectral, composition, moments, t_input, code_time, dt, @@ -335,26 +336,27 @@ function setup_moment_kinetics(input_dict::AbstractDict; ascii_io, io_moments, io_dfns = setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r, composition, collisions, moments.evolve_density, moments.evolve_upar, moments.evolve_ppar, external_source_settings, input_dict, - restart_time_index, previous_runs_info, time_for_setup) + restart_time_index, previous_runs_info, time_for_setup, nl_solver_params) # write initial data to ascii files write_data_to_ascii(pdf, moments, fields, vpa, vperp, z, r, code_time, composition.n_ion_species, composition.n_neutral_species, ascii_io) # write initial data to binary files write_all_moments_data_to_binary(moments, fields, code_time, - composition.n_ion_species, composition.n_neutral_species, io_moments, 1, 0.0, t_params, r, - z) + composition.n_ion_species, composition.n_neutral_species, io_moments, 1, 0.0, + t_params, nl_solver_params, r, z) write_all_dfns_data_to_binary(pdf, moments, fields, code_time, composition.n_ion_species, composition.n_neutral_species, io_dfns, 1, 0.0, - t_params, r, z, vperp, vpa, vzeta, vr, vz) + t_params, nl_solver_params, r, z, vperp, vpa, vzeta, vr, vz) begin_s_r_z_vperp_region() return pdf, scratch, scratch_implicit, code_time, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, ascii_io, io_moments, io_dfns + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, ascii_io, + io_moments, io_dfns end """ diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl new file mode 100644 index 000000000..963ab17ad --- /dev/null +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -0,0 +1,669 @@ +""" +Nonlinear solvers, using Jacobian-free Newton-Krylov methods. + +These solvers use an outer Newton iteration. Each step of the Newton iteration requires a +linear solve of the Jacobian. An 'inexact Jacobian' method is used, and the GMRES method +(GMRES is a type of Krylov solver) is used to (approximately) solve the (approximate) +linear system. + +!!! warning "parallelisation" + This module uses shared- and distributed-memory parallelism, so the functions in it + should not be called inside any kind of parallelised loop. This restriction should be + lifted somehow in future... + +`parallel_map()` is used to apply elementwise functions to arbitrary numbers of arguments +using shared-memory parallelism. We do this rather than writing the loops out explicitly +so that `newton_solve!()` and `linear_solve!()` can work for arrays with any combination +of dimensions. + +Useful references: +[1] V.A. Mousseau and D.A. Knoll, "Fully Implicit Kinetic Solution of Collisional Plasmas", Journal of Computational Physics 136, 308–323 (1997), https://doi.org/10.1006/jcph.1997.5736. +[2] V.A. Mousseau, "Fully Implicit Kinetic Modelling of Collisional Plasmas", PhD thesis, Idaho National Engineering Laboratory (1996), https://inis.iaea.org/collection/NCLCollectionStore/_Public/27/067/27067141.pdf. +[3] https://en.wikipedia.org/wiki/Generalized_minimal_residual_method +[4] https://www.rikvoorhaar.com/blog/gmres +[5] E. Carson , J. Liesen, Z. Strakoš, "Towards understanding CG and GMRES through examples", Linear Algebra and its Applications 692, 241–291 (2024), https://doi.org/10.1016/j.laa.2024.04.003. +""" +module nonlinear_solvers + +export setup_nonlinear_solve, newton_solve! + +using ..array_allocation: allocate_float, allocate_shared_float +using ..communication +using ..input_structs +using ..looping +using ..type_definitions: mk_float, mk_int + +using LinearAlgebra +using MINPACK +using MPI + +struct nl_solver_info{TH,TV,Tlig} + rtol::mk_float + atol::mk_float + linear_rtol::mk_float + linear_atol::mk_float + linear_restart::mk_int + linear_max_restarts::mk_int + H::TH + V::TV + linear_initial_guess::Tlig + n_solves::Ref{mk_int} + nonlinear_iterations::Ref{mk_int} + linear_iterations::Ref{mk_int} + serial_solve::Bool +end + +""" + +`coords` is a NamedTuple of coordinates corresponding to the dimensions of the variable +that will be solved. The entries in `coords` should be ordered the same as the memory +layout of the variable to be solved (i.e. fastest-varying first). +""" +function setup_nonlinear_solve(input_dict, coords; default_rtol=1.0e-5, + default_atol=1.0e-12, serial_solve=false) + nl_solver_section = set_defaults_and_check_section!( + input_dict, "nonlinear_solver"; + rtol=default_rtol, + atol=default_atol, + linear_rtol=1.0e-3, + linear_atol=1.0e-15, + linear_restart=10, + linear_max_restarts=0, + ) + nl_solver_input = Dict_to_NamedTuple(nl_solver_section) + + linear_restart = nl_solver_input.linear_restart + + if serial_solve + H = allocate_float(linear_restart + 1, linear_restart) + V = allocate_float((isa(c, coordinate) ? c.n : c for c ∈ values(coords))..., linear_restart+1) + H .= 0.0 + V .= 0.0 + else + H = allocate_shared_float(linear_restart + 1, linear_restart) + V = allocate_shared_float((isa(c, coordinate) ? c.n : c for c ∈ values(coords))..., linear_restart+1) + + begin_serial_region() + @serial_region begin + H .= 0.0 + V .= 0.0 + end + end + + linear_initial_guess = zeros(linear_restart) + + return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol, + nl_solver_input.linear_rtol, nl_solver_input.linear_atol, + linear_restart, nl_solver_input.linear_max_restarts, H, V, + linear_initial_guess, Ref(0), Ref(0), Ref(0), serial_solve) +end + +""" + newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, w, nl_solver_params; + left_preconditioner=nothing, right_preconditioner=nothing, coords) + +`x` is the initial guess at the solution, and is overwritten by the result of the Newton +solve. + +`rhs_func!(residual, x)` is the function we are trying to find a solution of. It calculates +```math +\\mathtt{residual} = F(\\mathtt{x}) +``` +where we are trying to solve \$F(x)=0\$. + +`residual`, `delta_x`, `rhs_delta` and `w` are buffer arrays, with the same size as `x`, +used internally. + +`left_preconditioner` or `right_preconditioner` apply preconditioning. They should be +passed a function that solves \$P.x = b\$ where \$P\$ is the preconditioner matrix, \$b\$ +is given by the values passed to the function as the argument, and the result \$x\$ is +returned by overwriting the argument. + +`coords` is a NamedTuple containing the `coordinate` structs corresponding to each +dimension in `x`. + + +Tolerances +---------- + +Note that the meaning of the relative tolerance `rtol` and absolute tolerance `atol` is +very different for the outer Newton iteration and the inner GMRES iteration. + +For the outer Newton iteration the residual \$R(x^n)\$ measures the departure of the +system from the solution (at each grid point). Its size can be compared to the size of the +solution `x`, so it makes sense to define an `error norm' for \$R(x^n)\$ as +```math +E(x^n) = \\left\\lVert \\frac{R(x^n)}{\\mathtt{rtol} x^n \\mathtt{atol}} \\right\\rVert_2 +``` +where \$\\left\\lVert \\cdot \\right\\rVert\$ is the 'L2 norm' (square-root of sum of +squares). We can further try to define a grid-size independent error norm by dividing out +the number of grid points to get a root-mean-square (RMS) error rather than an L2 norm. +```math +E_{\\mathrm{RMS}}(x^n) = \\sqrt{ \\frac{1}{N} \\sum_i \\frac{R(x^n)_i}{\\mathtt{rtol} x^n_i \\mathtt{atol}} } +``` +where \$N\$ is the total number of grid points. + +In contrast, GMRES is constructed to minimise the L2 norm of \$r_k = b - A\\cdot x_k\$ +where GMRES is solving the linear system \$A\\cdot x = b\$, \$x_k\$ is the approximation +to the solution \$x\$ at the \$k\$'th iteration and \$r_k\$ is the residual at the +\$k\$'th iteration. There is no flexibility to measure error relative to \$x\$ in any +sense. For GMRES, a `relative tolerance' is relative to the residual of the +right-hand-side \$b\$, which is the first iterate \$x_0\$ (when no initial guess is +given). [Where a non-zero initial guess is given it might be better to use a different +stopping criterion, see Carson et al. section 3.8.]. The stopping criterion for the GMRES +iteration is therefore +``` +\\left\\lVert r_k \\right\\rVert < \\max(\\mathtt{linear\\_rtol} \\left\\lVert r_0 \\right\\rVert, \\mathtt{linear\\_atol}) = \\max(\\mathtt{linear\\_rtol} \\left\\lVert b \\right\\rVert, \\mathtt{linear\\_atol}) +``` +As the GMRES solve is only used to get the right `direction' for the next Newton step, it +is not necessary to have a very tight `linear_rtol` for the GMRES solve. +""" +function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, + nl_solver_params; left_preconditioner=nothing, + right_preconditioner=nothing, coords) + + rtol = nl_solver_params.rtol + atol = nl_solver_params.atol + + distributed_error_norm = get_distributed_error_norm(coords, rtol, atol, x) + distributed_linear_norm = get_distributed_linear_norm(coords) + distributed_dot = get_distributed_dot(coords) + parallel_map = get_parallel_map(coords) + + residual_func!(residual, x) + residual_norm = distributed_error_norm(residual, coords) + counter = 0 + linear_counter = 0 + + parallel_map(()->0.0, delta_x) + + close_counter = -1 + close_linear_counter = -1 + previous_residual_norm = residual_norm + while residual_norm > 1.0 + counter += 1 + #println("\nNewton ", counter) + + if left_preconditioner === nothing + left_preconditioner = identity + end + if right_preconditioner === nothing + right_preconditioner = identity + end + + # Solve (approximately?): + # J δx = -RHS(x) + parallel_map(()->0.0, delta_x) + linear_its = linear_solve!(x, residual_func!, residual, delta_x, v, w; + coords=coords, rtol=nl_solver_params.linear_rtol, + atol=nl_solver_params.linear_atol, + restart=nl_solver_params.linear_restart, + max_restarts=nl_solver_params.linear_max_restarts, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner, + H=nl_solver_params.H, V=nl_solver_params.V, + rhs_delta=rhs_delta, + initial_guess=nl_solver_params.linear_initial_guess, + distributed_norm=distributed_linear_norm, + distributed_dot=distributed_dot, + parallel_map=parallel_map, + serial_solve=nl_solver_params.serial_solve) + linear_counter += linear_its + + # If the residual does not decrease, we will do a line search to find an update + # that does decrease the residual. The value of `x` is used to define the + # normalisation value with rtol that is used to calculate the residual, so do not + # want to update it until the line search is completed (otherwise the norm changes + # during the line search, which might make it fail to converge). So calculate the + # updated value in the buffer `w` until the line search is completed, and only + # then copy it into `x`. + parallel_map((x) -> x, w, x) + parallel_map((x,delta_x) -> x + delta_x, w, x, delta_x) + residual_func!(residual, w) + + # For the Newton iteration, we want the norm divided by the (sqrt of the) number + # of grid points, so we can use a tolerance that is independent of the size of the + # grid. This is unlike the norms needed in `linear_solve!()`. + residual_norm = distributed_error_norm(residual, coords) + if residual_norm > previous_residual_norm + # Do a line search between x and x+delta_x to try to find an update that does + # decrease residual_norm + s = 0.5 + while s > 1.0e-5 + parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + residual_func!(residual, x) + residual_norm = distributed_error_norm(residual, coords) + if residual_norm ≤ previous_residual_norm + break + end + s *= 0.5 + end + + if residual_norm > previous_residual_norm + # Failed to find a point that decreases the residual, so try a negative + # step + s = -1.0e-5 + parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + residual_func!(residual, x) + residual_norm = distributed_error_norm(residual, coords) + if residual_norm > previous_residual_norm + # That didn't work either, so just take the full step and hope for + # convergence later + parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + residual_func!(residual, x) + residual_norm = distributed_error_norm(residual, coords) + end + end + end + parallel_map((w) -> w, x, w) + previous_residual_norm = residual_norm + + #println("Newton residual ", residual_norm, " ", linear_its, " $rtol $atol") + + if residual_norm < 0.1/rtol && close_counter < 0 && close_linear_counter < 0 + close_counter = counter + close_linear_counter = linear_counter + end + + if counter > 100000 + error("maximum iteration limit reached") + break + end + end + nl_solver_params.n_solves[] += 1 + nl_solver_params.nonlinear_iterations[] += counter + nl_solver_params.linear_iterations[] += linear_counter +# println("Newton iterations: ", counter) +# println("Final residual: ", residual_norm) +# println("Total linear iterations: ", linear_counter) +# println("Linear iterations per Newton: ", linear_counter / counter) +# +# println("Newton iterations after close: ", counter - close_counter) +# println("Total linear iterations after close: ", linear_counter - close_linear_counter) +# println("Linear iterations per Newton after close: ", (linear_counter - close_linear_counter) / (counter - close_counter)) +# println() +end + +""" + get_distributed_error_norm(coords) + +Get a 'distributed_error_norm' function that acts on arrays with dimensions given by the +entries in `coords`. +""" +function get_distributed_error_norm(coords, rtol, atol, x) + dims = keys(coords) + if dims == (:z,) + this_norm = distributed_error_norm_z + elseif dims == (:vpa,) + this_norm = distributed_error_norm_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`distributed_error_norm_*()` function in nonlinear_solvers.jl") + end + + wrapped_norm = (args...; kwargs...) -> this_norm(args...; rtol=rtol, atol=atol, x, + kwargs...) + + return wrapped_norm +end + +function distributed_error_norm_z(residual::AbstractArray{mk_float, 1}, coords; rtol, + atol, x) + z = coords.z + + begin_z_region() + + local_norm = 0.0 + if z.irank < z.nrank - 1 + zend = z.n + @loop_z iz begin + if iz == zend + continue + end + local_norm += (residual[iz] / (rtol * abs(x[iz]) + atol))^2 + end + else + @loop_z iz begin + local_norm += (residual[iz] / (rtol * abs(x[iz]) + atol))^2 + end + end + + _block_synchronize() + block_norm = MPI.Reduce(local_norm, +, comm_block[]) + + if block_rank[] == 0 + global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) + global_norm = sqrt(global_norm / z.n_global) + else + global_norm = nothing + end + global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + + return global_norm +end + +function distributed_error_norm_vpa(residual::AbstractArray{mk_float, 1}, coords; rtol, + atol, x) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + residual_norm = 0.0 + for i ∈ eachindex(residual, x) + residual_norm += (residual[i] / (rtol * abs(x[i]) + atol))^2 + end + + residual_norm = sqrt(residual_norm / length(residual)) + + return residual_norm +end + +""" + get_distributed_linear_norm(coords) + +Get a 'distributed_linear_norm' function that acts on arrays with dimensions given by the +entries in `coords`. +""" +function get_distributed_linear_norm(coords) + dims = keys(coords) + if dims == (:z,) + return distributed_linear_norm_z + elseif dims == (:vpa,) + return distributed_linear_norm_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`distributed_linear_norm_*()` function in nonlinear_solvers.jl") + end +end + +function distributed_linear_norm_z(residual::AbstractArray{mk_float, 1}, coords) + z = coords.z + + begin_z_region() + + local_norm = 0.0 + if z.irank < z.nrank - 1 + zend = z.n + @loop_z iz begin + if iz == zend + continue + end + local_norm += residual[iz]^2 + end + else + @loop_z iz begin + local_norm += residual[iz]^2 + end + end + + _block_synchronize() + block_norm = MPI.Reduce(local_norm, +, comm_block[]) + + if block_rank[] == 0 + global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) + global_norm = sqrt(global_norm) + else + global_norm = nothing + end + global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + + return global_norm +end + +function distributed_linear_norm_vpa(residual::AbstractArray{mk_float, 1}, coords) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + return norm(residual) +end + +""" + get_distributed_dot(coords) + +Get a 'distributed_dot' function that acts on arrays with dimensions given by the entries +in `coords`. +""" +function get_distributed_dot(coords) + dims = keys(coords) + if dims == (:z,) + return distributed_dot_z + elseif dims == (:vpa,) + return distributed_dot_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`distributed_dot_*()` function in nonlinear_solvers.jl") + end +end + +function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}) + + begin_z_region() + + z = coords.z + + local_dot = 0.0 + if z.irank < z.nrank - 1 + zend = z.n + @loop_z iz begin + if iz == zend + continue + end + local_dot += x[iz] * y[iz] + end + else + @loop_z iz begin + local_dot += x[iz] * y[iz] + end + end + + _block_synchronize() + block_dot = MPI.Reduce(local_dot, +, comm_block[]) + + if block_rank[] == 0 + global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) + else + global_dot = nothing + end + + return global_dot +end + +function distributed_dot_vpa(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + return dot(x, y) +end + +""" + get_parallel_map(coords) + +Get a 'parallel_map' function that acts on arrays with dimensions given by the entries in +`coords`. +""" +function get_parallel_map(coords) + dims = keys(coords) + if dims == (:z,) + return parallel_map_z + elseif dims == (:vpa,) + return parallel_map_vpa + else + error("dims=$dims is not supported yet. Need to write another " + * "`parallel_map_*()` function in nonlinear_solvers.jl") + end +end + +function parallel_map_z(func, result::AbstractArray{mk_float, 1}, + args::AbstractArray{mk_float, 1}...) + + begin_z_region() + + @loop_z iz begin + result[iz] = func((x[iz] for x ∈ args)...) + end + + return nothing +end + +function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, + args::AbstractArray{mk_float, 1}...) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + if length(args) == 0 + for i ∈ eachindex(result) + result = func() + end + else + map!(func, result, args...) + end + return nothing +end + +""" +Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed +at each step of the outer Newton iteration (in `newton_solve!()`). +""" +function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol, + restart, max_restarts, left_preconditioner, right_preconditioner, + H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot, + parallel_map, serial_solve) + # Solve (approximately?): + # J δx = residual0 + + epsilon = 1.0e-8 + inv_epsilon = 1.0 / epsilon + + function approximate_Jacobian_vector_product!(v) + right_preconditioner(v) + + parallel_map((x,v) -> x + epsilon * v, v, x, v) + residual_func!(rhs_delta, v) + parallel_map((rhs_delta, residual0) -> (rhs_delta - residual0) * inv_epsilon, + v, rhs_delta, residual0) + left_preconditioner(v) + return v + end + + # To start with we use 'w' as a buffer to make a copy of residual0 to which we can apply + # the left-preconditioner. + parallel_map((delta_x) -> delta_x, v, delta_x) + left_preconditioner(residual0) + # This function transforms the data stored in 'v' from δx to ≈J.δx + approximate_Jacobian_vector_product!(v) + # Now we actually set 'w' as the first Krylov vector, and normalise it. + parallel_map((residual0, v) -> -residual0 - v, w, residual0, v) + beta = distributed_norm(w, coords) + parallel_map((w) -> w/beta, @view(V[:,1]), w) + + # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is + # so small that it is smaller than atol, in which case use atol instead. + tol = max(rtol * beta, atol) + + lsq_result = nothing + residual = Inf + counter = 0 + restart_counter = 1 + while true + for i ∈ 1:restart + counter += 1 + #println("Linear ", counter) + + # Compute next Krylov vector + parallel_map((V) -> V, w, @view(V[:,i])) + approximate_Jacobian_vector_product!(w) + + # Gram-Schmidt orthogonalization + for j ∈ 1:i + parallel_map((V) -> V, v, @view(V[:,j])) + w_dot_Vj = distributed_dot(w, v) + if serial_solve + H[j,i] = w_dot_Vj + else + begin_serial_region() + @serial_region begin + H[j,i] = w_dot_Vj + end + end + parallel_map((w, V) -> w - H[j,i] * V, w, w, @view(V[:,j])) + end + norm_w = distributed_norm(w, coords) + if serial_solve + H[i+1,i] = norm_w + else + begin_serial_region() + @serial_region begin + H[i+1,i] = norm_w + end + end + parallel_map((w) -> w / H[i+1,i], @view(V[:,i+1]), w) + + function temporary_residual!(result, guess) + #println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess)) + result .= @view(H[1:i+1,1:i]) * guess + result[1] -= beta + end + + # Second argument to fsolve needs to be a Vector{Float64} + if serial_solve + resize!(initial_guess, i) + initial_guess[1] = beta + initial_guess[2:i] .= 0.0 + lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) + residual = norm(lsq_result.f) + else + begin_serial_region() + if global_rank[] == 0 + resize!(initial_guess, i) + initial_guess[1] = beta + initial_guess[2:i] .= 0.0 + lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) + residual = norm(lsq_result.f) + else + residual = nothing + end + residual = MPI.bcast(residual, comm_world; root=0) + end + if residual < tol + break + end + end + + # Update initial guess fo restart + if serial_solve + y = lsq_result.x + else + if global_rank[] == 0 + y = lsq_result.x + else + y = nothing + end + y = MPI.bcast(y, comm_world; root=0) + end + + # The following is the `parallel_map()` version of + # delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y)) + # slightly abusing splatting to get the sum into a lambda-function. + parallel_map((delta_x, V...) -> delta_x + sum(this_y * this_V for (this_y, this_V) ∈ zip(y, V)), + delta_x, delta_x, (@view(V[:,i]) for i ∈ 1:length(y))...) + right_preconditioner(delta_x) + + if residual < tol || restart_counter > max_restarts + break + end + + restart_counter += 1 + + # Store J.delta_x in the variable delta_x, to use it to calculate the new first + # Krylov vector v/beta. + parallel_map((delta_x) -> delta_x, v, delta_x) + approximate_Jacobian_vector_product!(v) + + # Note residual0 has already had the left_preconditioner!() applied to it. + parallel_map((residual0, v) -> -residual0 - v, v, residual0, v) + beta = distributed_norm(v, coords) + for i ∈ 2:length(y) + parallel_map(() -> 0.0, @view(V[:,i])) + end + parallel_map((v) -> v/beta, @view(V[:,1]), v) + end + + return counter +end + +end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index dcdc5911b..9539be94c 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -40,6 +40,7 @@ using ..charge_exchange: charge_exchange_collisions_1V!, charge_exchange_collisi using ..ionization: ionization_collisions_1V!, ionization_collisions_3V!, constant_ionization_source! using ..krook_collisions: krook_collisions! using ..external_sources +using ..nonlinear_solvers using ..numerical_dissipation: vpa_boundary_buffer_decay!, vpa_boundary_buffer_diffusion!, vpa_dissipation!, z_dissipation!, r_dissipation!, vperp_dissipation!, @@ -419,6 +420,15 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop external_source_settings, num_diss_params, manufactured_solns_input, r, z, vperp, vpa, vzeta, vr, vz) + advance_implicit = + setup_implicit_advance_flags(moments, composition, t_params, collisions, + external_source_settings, num_diss_params, + manufactured_solns_input, r, z, vperp, vpa, vzeta, + vr, vz) + + # Set up parameters for Jacobian-free Newton-Krylov solver used for implicit part of + # timesteps. + nl_solver_params = () begin_serial_region() @@ -652,8 +662,9 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # Ensure all processes are synchronized at the end of the setup _block_synchronize() - return moments, spectral_objects, scratch, scratch_implicit, advance, t_params, - fp_arrays, gyroavs, manufactured_source_list + return moments, spectral_objects, scratch, scratch_implicit, advance, + advance_implicit, t_params, fp_arrays, gyroavs, manufactured_source_list, + nl_solver_params end """ @@ -817,6 +828,66 @@ function setup_advance_flags(moments, composition, t_params, collisions, manufactured_solns_test, r_diffusion, vpa_diffusion, vperp_diffusion, vz_diffusion) end +""" +create the 'advance_info' struct to be used in the time advance to +indicate which parts of the equations are to be advanced implicitly (using +`backward_euler!()`). +""" +function setup_implicit_advance_flags(moments, composition, t_params, collisions, + external_source_settings, num_diss_params, + manufactured_solns_input, r, z, vperp, vpa, vzeta, + vr, vz) + # default is not to concurrently advance different operators + advance_vpa_advection = false + advance_vperp_advection = false + advance_z_advection = false + advance_r_advection = false + advance_cx_1V = false + advance_cx = false + advance_ionization = false + advance_ionization_1V = false + advance_ionization_source = false + advance_krook_collisions_ii = false + advance_external_source = false + advance_numerical_dissipation = false + advance_sources = false + advance_continuity = false + advance_force_balance = false + advance_energy = false + advance_neutral_z_advection = false + advance_neutral_r_advection = false + advance_neutral_vz_advection = false + advance_neutral_external_source = false + advance_neutral_sources = false + advance_neutral_continuity = false + advance_neutral_force_balance = false + advance_neutral_energy = false + r_diffusion = false + vpa_diffusion = false + vperp_diffusion = false + vz_diffusion = false + explicit_weakform_fp_collisions = false + if t_params.split_operators + error("Implicit timesteps do not support `t_params.split_operators=true`") + end + + manufactured_solns_test = manufactured_solns_input.use_for_advance + + return advance_info(advance_vpa_advection, advance_vperp_advection, advance_z_advection, advance_r_advection, + advance_neutral_z_advection, advance_neutral_r_advection, + advance_neutral_vz_advection, advance_cx, advance_cx_1V, + advance_ionization, advance_ionization_1V, + advance_ionization_source, advance_krook_collisions_ii, + explicit_weakform_fp_collisions, + advance_external_source, advance_numerical_dissipation, + advance_sources, advance_continuity, advance_force_balance, + advance_energy, advance_neutral_external_source, + advance_neutral_sources, advance_neutral_continuity, + advance_neutral_force_balance, advance_neutral_energy, + manufactured_solns_test, r_diffusion, vpa_diffusion, + vperp_diffusion, vz_diffusion) +end + function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies_ion,nspecies_neutral) dummy_s = allocate_float(nspecies_ion) @@ -1001,11 +1072,13 @@ df/dt + δv⋅∂f/∂z = 0, with δv(z,t)=v(z,t)-v₀(z) for prudent choice of v₀, expect δv≪v so that explicit time integrator can be used without severe CFL condition """ -function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, - moments, fields, spectral_objects, advect_objects, - composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, ascii_io, io_moments, io_dfns) +function time_advance!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, + advect_objects, composition, collisions, geometry, gyroavs, + boundary_distributions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, fp_arrays, + scratch_dummy, manufactured_source_list, ascii_io, io_moments, + io_dfns) @debug_detect_redundant_block_synchronize begin # Only want to check for redundant _block_synchronize() calls during the @@ -1078,17 +1151,18 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, - num_diss_params, advance, - t_params.step_counter[]) + num_diss_params, nl_solver_params, advance, + advance_implicit, t_params.step_counter[]) else time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, boundary_distributions, external_source_settings, - num_diss_params, advance, fp_arrays, - scratch_dummy, manufactured_source_list, - diagnostic_checks, t_params.step_counter[]) + num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, + manufactured_source_list, diagnostic_checks, + t_params.step_counter[]) end # update the time t += t_params.previous_dt[] @@ -1205,7 +1279,8 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr write_all_moments_data_to_binary(moments, fields, t, composition.n_ion_species, composition.n_neutral_species, io_moments, - iwrite_moments, time_for_run, t_params, r, z) + iwrite_moments, time_for_run, t_params, + nl_solver_params, r, z) if t_params.steady_state_residual # Calculate some residuals to see how close simulation is to steady state @@ -1288,8 +1363,9 @@ function time_advance!(pdf, scratch, t, t_params, vz, vr, vzeta, vpa, vperp, gyr write_all_dfns_data_to_binary(pdf, moments, fields, t, composition.n_ion_species, composition.n_neutral_species, io_dfns, - iwrite_dfns, time_for_run, t_params, r, z, - vperp, vpa, vzeta, vr, vz) + iwrite_dfns, time_for_run, t_params, + nl_solver_params, r, z, vperp, vpa, vzeta, vr, + vz) iwrite_dfns += 1 begin_s_r_z_vperp_region() @debug_detect_redundant_block_synchronize begin @@ -1321,8 +1397,8 @@ end function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, - external_source_settings, num_diss_params, advance, - istep) + external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) # define some abbreviated variables for tidiness n_ion_species = composition.n_ion_species @@ -1337,16 +1413,16 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para advance.vpa_advection = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.vpa_advection = false # z_advection! advances the operator-split 1D advection equation in z # apply z-advection operation to all species (ion and neutral) advance.z_advection = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.z_advection = false # account for charge exchange collisions between ions and neutrals if composition.n_neutral_species > 0 @@ -1355,7 +1431,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.cx_collisions = false end if collisions.ionization > 0.0 @@ -1363,7 +1439,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.ionization_collisions = false end end @@ -1372,7 +1448,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, z_SL, vpa_SL, composition, collisions, sources, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.krook_collisions_ii = false end # and add the source terms associated with redefining g = pdf/density or pdf*vth/density @@ -1382,7 +1458,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.source_terms = false end # use the continuity equation to update the density @@ -1391,7 +1467,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.continuity = false end # use force balance to update the parallel flow @@ -1400,7 +1476,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.force_balance = false end # use the energy equation to update the parallel pressure @@ -1409,7 +1485,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.energy = false end else @@ -1419,7 +1495,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.energy = false end # use force balance to update the parallel flow @@ -1428,7 +1504,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.force_balance = false end # use the continuity equation to update the density @@ -1437,7 +1513,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.continuity = false end # and add the source terms associated with redefining g = pdf/density or pdf*vth/density @@ -1447,7 +1523,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.source_terms = false end # account for charge exchange collisions between ions and neutrals @@ -1457,7 +1533,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.ionization = false end if collisions.charge_exchange > 0.0 @@ -1465,7 +1541,7 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, - advance, istep) + nl_solver_params, advance, advance_implicit, istep) advance.cx_collisions = false end end @@ -1474,16 +1550,16 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para advance.z_advection = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.z_advection = false # advance the operator-split 1D advection equation in vpa # vpa-advection only applies for ion species advance.vpa_advection = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, - composition, collisions, external_source_settings, num_diss_params, advance, - istep) + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) advance.vpa_advection = false end return nothing @@ -1495,15 +1571,17 @@ function time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, + external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, diagnostic_checks, istep) ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, scratch_dummy, - manufactured_source_list, diagnostic_checks, istep) + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + diagnostic_checks, istep) return nothing end @@ -1979,8 +2057,9 @@ end function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, boundary_distributions, - external_source_settings, num_diss_params, advance, fp_arrays, - scratch_dummy, manufactured_source_list,diagnostic_checks, istep) + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, + manufactured_source_list,diagnostic_checks, istep) begin_s_r_z_region() @@ -2019,9 +2098,57 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa if global_rank[] == 0 println("ion step ", t_params.step_counter[], ".", istage, " ", t) end - # do an Euler time advance, with scratch[2] containing the advanced quantities - # and scratch[1] containing quantities at time level n - update_solution_vector!(scratch, moments, istage, composition, vpa, vperp, z, r) + if t_params.rk_coefs_implicit !== nothing + update_solution_vector!(scratch_implicit[istage], scratch[istage], moments, + composition, vpa, vperp, z, r) + if t_params.implicit_coefficient_is_zero[istage] + # No implicit solve needed at this stage. Do an explicit step of the + # implicitly-evolved terms so we can store their time-derivative at this + # stage. + euler_time_advance!(scratch_implicit[istage], scratch[istage], + pdf, fields, moments, advect_objects, vz, vr, vzeta, + vpa, vperp, gyrophase, z, r, t, t_params.dt[], + spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, + advance_implicit, fp_arrays, istage) + # The result of the forward-Euler step is just a hack to store the + # (explicit) time-derivative of the implicitly advanced terms. The result + # is not used as input to the explicit part of the IMEX advance. + update_solution_vector!(scratch[istage+1], scratch[istage], moments, + composition, vpa, vperp, z, r) + else + # Backward-Euler step for implicitly-evolved terms. + # Note the timestep for this solve is rk_coefs_implict[istage,istage]*dt. + # The diagonal elements are equal to the Butcher 'a' coefficients + # rk_coefs_implicit[istage,istage]=a[istage,istage]. + backward_euler!(scratch_implicit[istage], scratch[istage], pdf, + fields, moments, advect_objects, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, t, + t_params.dt[] * t_params.rk_coefs_implicit[istage,istage], + spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, + nl_solver_params, advance_implicit, fp_arrays, istage) + # The result of the implicit solve gives the state vector at 'istage' + # which is used as input to the explicit part of the IMEX time step. + # Note that boundary conditions and constraints should already have been + # applied to the solution in `scratch_implicit[istage]`, as part of the + # `backward_euler!()` solve. + update_solution_vector!(scratch[istage+1], scratch_implicit[istage], moments, + composition, vpa, vperp, z, r) + end + old_scratch = scratch_implicit[istage] + else + # Fully explicit method starts the forward-Euler step with the result from the + # previous stage. + update_solution_vector!(scratch[istage+1], scratch[istage], moments, + composition, vpa, vperp, z, r) + old_scratch = scratch[istage] + end + # do an Euler time advance, with scratch[istage+1] containing the advanced + # quantities and scratch[istage] containing quantities at time level n, RK stage + # istage # calculate f^{(1)} = fⁿ + Δt*G[fⁿ] = scratch[2].pdf euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments, advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, @@ -2342,7 +2469,7 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, composition, collisions, geometry, scratch_dummy, manufactured_source_list, external_source_settings, - num_diss_params, advance, fp_arrays, istage) + num_diss_params, nl_solver_params, advance, fp_arrays, istage) # No terms supported here yet From 8dd9f292ce6770698320f2621db149ce816e35e2 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 7 May 2024 22:37:05 +0100 Subject: [PATCH 29/75] Tests for nonlinear_solvers --- .../test/nonlinear_solver_tests.jl | 219 ++++++++++++++++++ moment_kinetics/test/runtests.jl | 1 + 2 files changed, 220 insertions(+) create mode 100644 moment_kinetics/test/nonlinear_solver_tests.jl diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl new file mode 100644 index 000000000..dfca94b83 --- /dev/null +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -0,0 +1,219 @@ +module NonlinearSolverTests + +include("setup.jl") + +using moment_kinetics.array_allocation: allocate_shared_float +using moment_kinetics.communication +using moment_kinetics.coordinates: coordinate +using moment_kinetics.input_structs: advection_input +using moment_kinetics.looping +using moment_kinetics.looping: setup_loop_ranges! +using moment_kinetics.nonlinear_solvers +using moment_kinetics.type_definitions: mk_float, mk_int + +using MPI + +function linear_test() + @testset "linear test" begin + println(" - linear test") + # Test represents constant-coefficient diffusion, in 1D steady state, with a + # central finite-difference discretisation of the second derivative. + # + # Note, need to use newton_solve!() here even though it is a linear problem, + # because the inexact Jacobian-vector product we use in linear_solve!() means + # linear_solve!() on its own does not converge to the correct answer. + + n = 16 + restart = 8 + max_restarts = 1 + atol = 1.0e-10 + + irank_z, nrank_z, comm_sub_z, irank_r, nrank_r, comm_sub_r = + setup_distributed_memory_MPI(1, 1, 1, 1) + + setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=0, r=1, z=n, vperp=1, vpa=1, + vzeta=1, vr=1, vz=1) + + A = zeros(n,n) + i = 1 + A[i,i] = -2.0 + A[i,i+1] = 1.0 + for i ∈ 2:n-1 + A[i,i-1] = 1.0 + A[i,i] = -2.0 + A[i,i+1] = 1.0 + end + i = n + A[i,i-1] = 1.0 + A[i,i] = -2.0 + + z = collect(0:n-1) ./ (n-1) + b = @. - z * (1.0 - z) + + the_coord = coordinate("foo", n, n, n, 1, 1, 1, 0, 1.0, zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), + zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_int, 0, 0), + "", "", "", "", zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), + advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), + zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, + zeros(mk_float, 0), zeros(mk_float, 0), "", + zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0), + zeros(mk_float, 0, 0)) + coords = (z=the_coord,) + + function rhs_func!(residual, x) + begin_serial_region() + @serial_region begin + residual .= A * x - b + end + return nothing + end + + x = allocate_shared_float(n) + residual = allocate_shared_float(n) + delta_x = allocate_shared_float(n) + rhs_delta = allocate_shared_float(n) + v = allocate_shared_float(n) + w = allocate_shared_float(n) + + begin_serial_region() + @serial_region begin + x .= 0.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + end + + nl_solver_params = setup_nonlinear_solve( + Dict{String,Any}("nonlinear_solver" => + Dict{String,Any}("rtol" => 0.0, + "atol" => atol, + "linear_restart" => restart, + "linear_max_restarts" => max_restarts)), + coords) + + newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; + coords) + + begin_serial_region() + @serial_region begin + x_direct = A \ b + + @test isapprox(x, x_direct; atol=100.0*atol) + end + end +end + +function nonlinear_test() + @testset "non-linear test" begin + println(" - non-linear test") + # Test represents constant-coefficient diffusion, in 1D steady state, with a + # central finite-difference discretisation of the second derivative. + # + # Note, need to use newton_solve!() here even though it is a linear problem, + # because the inexact Jacobian-vector product we use in linear_solve!() means + # linear_solve!() on its own does not converge to the correct answer. + + n = 16 + restart = 10 + max_restarts = 0 + atol = 1.0e-10 + + irank_z, nrank_z, comm_sub_z, irank_r, nrank_r, comm_sub_r = + setup_distributed_memory_MPI(1, 1, 1, 1) + + setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=0, r=1, z=n, vperp=1, vpa=1, + vzeta=1, vr=1, vz=1) + + z = collect(0:n-1) ./ (n-1) + b = @. - z * (1.0 - z) + + the_coord = coordinate("foo", n, n, n, 1, 1, 1, 0, 1.0, zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), + zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_int, 0, 0), + "", "", "", "", zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), + advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), + zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, + zeros(mk_float, 0), zeros(mk_float, 0), "", + zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0), + zeros(mk_float, 0, 0)) + coords = (z=the_coord) + + function rhs_func!(residual, x) + begin_serial_region() + @serial_region begin + i = 1 + D = abs(x[i])^2.5 + residual[i] = D * (- 2.0 * x[i] + x[i+1]) - b[i] + for i ∈ 2:n-1 + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i] + x[i+1]) - b[i] + end + i = n + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i]) - b[i] + end + return nothing + end + + x = allocate_shared_float(n) + residual = allocate_shared_float(n) + delta_x = allocate_shared_float(n) + rhs_delta = allocate_shared_float(n) + v = allocate_shared_float(n) + w = allocate_shared_float(n) + + begin_serial_region() + @serial_region begin + x .= 1.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + end + + nl_solver_params = setup_nonlinear_solve( + Dict{String,Any}("nonlinear_solver" => + Dict{String,Any}("rtol" => 0.0, + "atol" => atol, + "linear_restart" => restart, + "linear_max_restarts" => max_restarts)), + coords) + + newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; + coords) + + rhs_func!(residual, x) + + begin_serial_region() + @serial_region begin + @test isapprox(residual, zeros(n); atol=4.0*atol) + end + end +end + +function runtests() + @testset "non-linear solvers" begin + println("non-linear solver tests") + linear_test() + nonlinear_test() + end +end + +end # NonlinearSolverTests + +using .NonlinearSolverTests +NonlinearSolverTests.runtests() diff --git a/moment_kinetics/test/runtests.jl b/moment_kinetics/test/runtests.jl index 1b78fca36..fa0f5d64f 100644 --- a/moment_kinetics/test/runtests.jl +++ b/moment_kinetics/test/runtests.jl @@ -7,6 +7,7 @@ function runtests() include(joinpath(@__DIR__, "calculus_tests.jl")) include(joinpath(@__DIR__, "interpolation_tests.jl")) include(joinpath(@__DIR__, "loop_setup_tests.jl")) + include(joinpath(@__DIR__, "nonlinear_solver_tests.jl")) include(joinpath(@__DIR__, "velocity_integral_tests.jl")) include(joinpath(@__DIR__, "sound_wave_tests.jl")) include(joinpath(@__DIR__, "nonlinear_sound_wave_tests.jl")) From d96409492bc771a68cceb2be730fe3bef58c7495 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 8 May 2024 21:02:20 +0100 Subject: [PATCH 30/75] Add tuple of variable names that exist in output file to run_info --- moment_kinetics/src/load_data.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 8c498c1c6..522d77b19 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -2722,6 +2722,9 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin vz_chunk_size = 1 end + # Get variable names just from the first restart, for simplicity + variable_names = get_variable_keys(get_group(fids0[1], "dynamic_data")) + if parallel_io files = fids0 else @@ -2749,7 +2752,8 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin vz_spectral=vz_spectral, r_chunk_size=r_chunk_size, z_chunk_size=z_chunk_size, vperp_chunk_size=vperp_chunk_size, vpa_chunk_size=vpa_chunk_size, vzeta_chunk_size=vzeta_chunk_size, - vr_chunk_size=vr_chunk_size, vz_chunk_size=vz_chunk_size, dfns=dfns) + vr_chunk_size=vr_chunk_size, vz_chunk_size=vz_chunk_size, + variable_names=variable_names, dfns=dfns) return run_info end From 251bbb73106a85d65cd2836feb8a5a7eefc85c7b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 8 May 2024 20:39:12 +0100 Subject: [PATCH 31/75] Load and plot nonlinear solver diagnostics --- .../src/makie_post_processing.jl | 43 +++++++++++++++++++ moment_kinetics/src/load_data.jl | 14 ++++++ 2 files changed, 57 insertions(+) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 7faf260ce..95e24d969 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7438,6 +7438,41 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) put_legend_right(limits_fig, ax) + # Plot nonlinear solver diagnostics (if any) + nl_solvers_fig, ax = get_1d_ax(; xlabel="time", ylabel="iterations per solve/nonlinear-iteration") + has_nl_solver = false + + for ri ∈ run_info + if length(run_info) == 1 + prefix = "" + else + prefix = ri.run_name * " " + end + if it !== nothing + time = ri.time[it] + else + time = ri.time + end + + nl_nonlinear_iterations_names = Tuple(v for v ∈ ri.variable_names + if occursin("_nonlinear_iterations", v)) + if nl_nonlinear_iterations_names != () + has_nl_solver = true + nl_prefixes = (split(v, "_nonlinear_iterations")[1] + for v ∈ nl_nonlinear_iterations_names) + for p ∈ nl_prefixes + nonlinear_iterations = get_variable(ri, "$(p)_nonlinear_iterations_per_solve") + linear_iterations = get_variable(ri, "$(p)_linear_iterations_per_nonlinear_iteration") + plot_1d(time, nonlinear_iterations, label=prefix * " " * p * " NL per solve", ax=ax) + plot_1d(time, linear_iterations, label=prefix * " " * p * " L per NL", ax=ax) + end + end + end + + if has_nl_solver + put_legend_right(nl_solvers_fig, ax) + end + if plot_prefix !== nothing outfile = plot_prefix * "timestep_diagnostics.pdf" @@ -7448,11 +7483,19 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) outfile = plot_prefix * "timestep_limits.pdf" save(outfile, limits_fig) + + if has_nl_solver + outfile = plot_prefix * "nonlinear_solver_iterations.pdf" + save(outfile, nl_solvers_fig) + end else display(steps_fig) display(dt_fig) display(CFL_fig) display(limits_fig) + if has_nl_solver + display(nl_solvers_fig) + end end end diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 522d77b19..b8b07f821 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -3717,6 +3717,20 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t variable[it] = min_CFL end variable = select_slice_of_variable(variable; kwargs...) + elseif occursin("_nonlinear_iterations_per_solve", variable_name) + prefix = split(variable_name, "_nonlinear_iterations_per_solve")[1] + nl_nsolves = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_n_solves"; kwargs...) + nl_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_nonlinear_iterations"; kwargs...) + variable = nl_iterations ./ nl_nsolves + elseif occursin("_linear_iterations_per_nonlinear_iteration", variable_name) + prefix = split(variable_name, "_linear_iterations_per_nonlinear_iteration")[1] + nl_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_nonlinear_iterations"; kwargs...) + nl_linear_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_linear_iterations"; kwargs...) + variable = nl_linear_iterations ./ nl_iterations else variable = postproc_load_variable(run_info, variable_name; kwargs...) end From 94ad3dfbb1251fea1602895dc8d1c3d221309fd5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 12 May 2024 14:31:00 +0100 Subject: [PATCH 32/75] Leave t_input as a Dict so it can be modified by setup_time_advance() --- moment_kinetics/src/load_data.jl | 6 +-- moment_kinetics/src/moment_kinetics_input.jl | 39 +++++++------- moment_kinetics/src/time_advance.jl | 53 ++++++++++---------- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index b8b07f821..430edb5a9 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -2736,9 +2736,9 @@ function get_run_info_no_setup(run_dir::Union{AbstractString,Tuple{AbstractStrin run_info = (run_name=run_name, run_prefix=base_prefix, parallel_io=parallel_io, ext=ext, nblocks=nblocks, files=files, input=input, n_ion_species=n_ion_species, n_neutral_species=n_neutral_species, - evolve_moments=evolve_moments, composition=composition, species=species, - collisions=collisions, geometry=geometry, drive_input=drive_input, - num_diss_params=num_diss_params, + evolve_moments=evolve_moments, t_input=t_input, composition=composition, + species=species, collisions=collisions, geometry=geometry, + drive_input=drive_input, num_diss_params=num_diss_params, external_source_settings=external_source_settings, evolve_density=evolve_density, evolve_upar=evolve_upar, evolve_ppar=evolve_ppar, diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 4aa85b666..09301a919 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -225,38 +225,37 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) if timestepping_section["atol_upar"] === nothing timestepping_section["atol_upar"] = 1.0e-2 * timestepping_section["rtol"] end - timestepping_input = Dict_to_NamedTuple(timestepping_section) - if !(0.0 < timestepping_input.step_update_prefactor < 1.0) - error("step_update_prefactor=$(timestepping_input.step_update_prefactor) must " + if !(0.0 < timestepping_section["step_update_prefactor"] < 1.0) + error("step_update_prefactor=$(timestepping_section["step_update_prefactor"]) must " * "be between 0.0 and 1.0.") end - if timestepping_input.max_increase_factor ≤ 1.0 - error("max_increase_factor=$(timestepping_input.max_increase_factor) must " + if timestepping_section["max_increase_factor"] ≤ 1.0 + error("max_increase_factor=$(timestepping_section["max_increase_factor"]) must " * "be greater than 1.0.") end - if timestepping_input.max_increase_factor_near_last_fail ≤ 1.0 + if timestepping_section["max_increase_factor_near_last_fail"] ≤ 1.0 error("max_increase_factor_near_last_fail=" - * "$(timestepping_input.max_increase_factor_near_last_fail) must be " + * "$(timestepping_section["max_increase_factor_near_last_fail"]) must be " * "greater than 1.0.") end - if !isinf(timestepping_input.max_increase_factor_near_last_fail) && - timestepping_input.max_increase_factor_near_last_fail > timestepping_input.max_increase_factor + if !isinf(timestepping_section["max_increase_factor_near_last_fail"]) && + timestepping_section["max_increase_factor_near_last_fail"] > timestepping_section["max_increase_factor"] error("max_increase_factor_near_last_fail=" - * "$(timestepping_input.max_increase_factor_near_last_fail) should be " + * "$(timestepping_section["max_increase_factor_near_last_fail"]) should be " * "less than max_increase_factor=" - * "$(timestepping_input.max_increase_factor).") + * "$(timestepping_section["max_increase_factor"]).") end - if timestepping_input.last_fail_proximity_factor ≤ 1.0 + if timestepping_section["last_fail_proximity_factor"] ≤ 1.0 error("last_fail_proximity_factor=" - * "$(timestepping_input.last_fail_proximity_factor) must be " + * "$(timestepping_section["last_fail_proximity_factor"]) must be " * "greater than 1.0.") end - if timestepping_input.minimum_dt > timestepping_input.maximum_dt - error("minimum_dt=$(timestepping_input.minimum_dt) must be less than " - * "maximum_dt=$(timestepping_input.maximum_dt)") + if timestepping_section["minimum_dt"] > timestepping_section["maximum_dt"] + error("minimum_dt=$(timestepping_section["minimum_dt"]) must be less than " + * "maximum_dt=$(timestepping_section["maximum_dt"])") end - if timestepping_input.maximum_dt ≤ 0.0 - error("maximum_dt=$(timestepping_input.maximum_dt) must be positive") + if timestepping_section["maximum_dt"] ≤ 0.0 + error("maximum_dt=$(timestepping_section["maximum_dt"]) must be positive") end use_for_init_is_default = !(("manufactured_solns" ∈ keys(scan_input)) && @@ -663,12 +662,12 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) end # check input (and initialized coordinate structs) to catch errors/unsupported options - check_input(io, output_dir, timestepping_input.nstep, timestepping_input.dt, r, z, + check_input(io, output_dir, timestepping_section["nstep"], timestepping_section["dt"], r, z, vpa, vperp, composition, species_immutable, evolve_moments, num_diss_params, save_inputs_to_txt, collisions) # return immutable structs for z, vpa, species and composition - all_inputs = (io_immutable, evolve_moments, timestepping_input, z, z_spectral, r, + all_inputs = (io_immutable, evolve_moments, timestepping_section, z, z_spectral, r, r_spectral, vpa, vpa_spectral, vperp, vperp_spectral, gyrophase, gyrophase_spectral, vz, vz_spectral, vr, vr_spectral, vzeta, vzeta_spectral, composition, species_immutable, collisions, geometry, diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 9539be94c..d1a39d5de 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -236,9 +236,9 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, dt_before_last_fail_reload, manufactured_solns_input, io_input) rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor = - setup_runge_kutta_coefficients!(t_input.type, - t_input.CFL_prefactor, - t_input.split_operators) + setup_runge_kutta_coefficients!(t_input["type"], + t_input["CFL_prefactor"], + t_input["split_operators"]) if !adaptive # No adaptive timestep, want to use the value from the input file even when we are @@ -253,32 +253,32 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, dt_before_last_fail = allocate_shared_float(1) step_to_output = allocate_shared_bool(1) if block_rank[] == 0 - dt_shared[] = dt_reload === nothing ? t_input.dt : dt_reload - previous_dt_shared[] = dt_reload === nothing ? t_input.dt : dt_reload + dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload + previous_dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload next_output_time[] = 0.0 - dt_before_output[] = dt_reload === nothing ? t_input.dt : dt_reload + dt_before_output[] = dt_reload === nothing ? t_input["dt"] : dt_reload dt_before_last_fail[] = dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload step_to_output[] = false end _block_synchronize() - end_time = code_time + t_input.dt * t_input.nstep + end_time = code_time + t_input["dt"] * t_input["nstep"] epsilon = 1.e-11 - if adaptive || t_input.write_after_fixed_step_count - if t_input.nwrite == 0 + if adaptive || t_input["write_after_fixed_step_count"] + if t_input["nwrite"] == 0 moments_output_times = [end_time] else - moments_output_times = [code_time + i*t_input.dt - for i ∈ t_input.nwrite:t_input.nwrite:t_input.nstep] + moments_output_times = [code_time + i*t_input["dt"] + for i ∈ t_input["nwrite"]:t_input["nwrite"]:t_input["nstep"]] end if moments_output_times[end] < end_time - epsilon push!(moments_output_times, end_time) end - if t_input.nwrite_dfns == 0 + if t_input["nwrite_dfns"] == 0 dfns_output_times = [end_time] else - dfns_output_times = [code_time + i*t_input.dt - for i ∈ t_input.nwrite_dfns:t_input.nwrite_dfns:t_input.nstep] + dfns_output_times = [code_time + i*t_input["dt"] + for i ∈ t_input["nwrite_dfns"]:t_input["nwrite_dfns"]:t_input["nstep"]] end if dfns_output_times[end] < end_time - epsilon push!(dfns_output_times, end_time) @@ -289,25 +289,26 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, dfns_output_times = mk_float[] end - if t_input.high_precision_error_sum + if t_input["high_precision_error_sum"] error_sum_zero = Float128(0.0) else error_sum_zero = 0.0 end - return time_info(n_variables, t_input.nstep, end_time, dt_shared, previous_dt_shared, + return time_info(n_variables, t_input["nstep"], end_time, dt_shared, previous_dt_shared, next_output_time, dt_before_output, dt_before_last_fail, CFL_prefactor, step_to_output, Ref(0), Ref(0), mk_int[], mk_int[], - t_input.nwrite, t_input.nwrite_dfns, moments_output_times, - dfns_output_times, t_input.type, rk_coefs, rk_coefs_implicit, + t_input["nwrite"], t_input["nwrite_dfns"], moments_output_times, + dfns_output_times, t_input["type"], rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, - low_storage, t_input.rtol, t_input.atol, t_input.atol_upar, - t_input.step_update_prefactor, t_input.max_increase_factor, - t_input.max_increase_factor_near_last_fail, - t_input.last_fail_proximity_factor, t_input.minimum_dt, - t_input.maximum_dt, t_input.write_after_fixed_step_count, - error_sum_zero, t_input.split_operators, - t_input.steady_state_residual, t_input.converged_residual_value, - manufactured_solns_input.use_for_advance, t_input.stopfile_name) + low_storage, t_input["rtol"], t_input["atol"], t_input["atol_upar"], + t_input["step_update_prefactor"], t_input["max_increase_factor"], + t_input["max_increase_factor_near_last_fail"], + t_input["last_fail_proximity_factor"], t_input["minimum_dt"], + t_input["maximum_dt"], + t_input["write_after_fixed_step_count"], + t_input["split_operators"], t_input["steady_state_residual"], + t_input["converged_residual_value"], + manufactured_solns_input.use_for_advance, t_input["stopfile_name"]) end """ From 345f9fa74fb5861c8f1123a813bbd2eddf325874 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 12 May 2024 13:44:02 +0100 Subject: [PATCH 33/75] Implicit solve for vpa_advection term --- .../src/makie_post_processing.jl | 15 +- moment_kinetics/src/coordinates.jl | 16 ++- moment_kinetics/src/file_io.jl | 2 +- moment_kinetics/src/input_structs.jl | 1 + moment_kinetics/src/moment_kinetics.jl | 4 +- moment_kinetics/src/moment_kinetics_input.jl | 1 + moment_kinetics/src/nonlinear_solvers.jl | 5 +- moment_kinetics/src/time_advance.jl | 75 +++++++--- moment_kinetics/src/vpa_advection.jl | 132 ++++++++++++++++++ 9 files changed, 219 insertions(+), 32 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 95e24d969..24fd3ae2f 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7298,7 +7298,10 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) else time = ri.time end - CFL_vars = ["minimum_CFL_ion_z", "minimum_CFL_ion_vpa"] + CFL_vars = ["minimum_CFL_ion_z"] + if !ri.t_input["implicit_vpa_advection"] + push!(CFL_vars, "minimum_CFL_ion_vpa") + end if ri.n_neutral_species > 0 push!(CFL_vars, "minimum_CFL_neutral_z", "minimum_CFL_neutral_vz") end @@ -7408,10 +7411,12 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "ion z advect", ax=ax, linestyle=:dot) - # Ion vpa advection - counter += 1 - plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion vpa advect", ax=ax, linestyle=:dot) + if !ri.t_input["implicit_vpa_advection"] + # Ion vpa advection + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion vpa advect", ax=ax, linestyle=:dot) + end if ri.n_neutral_species > 0 # Ion z advection diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 746d3143d..743b33545 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -77,6 +77,12 @@ struct coordinate{T <: AbstractVector{mk_float}} scratch2::Array{mk_float,1} # scratch3 is an array used for intermediate calculations requiring n entries scratch3::Array{mk_float,1} + # scratch4 is an array used for intermediate calculations requiring n entries + scratch4::Array{mk_float,1} + # scratch5 is an array used for intermediate calculations requiring n entries + scratch5::Array{mk_float,1} + # scratch6 is an array used for intermediate calculations requiring n entries + scratch6::Array{mk_float,1} # scratch_shared is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared::T @@ -221,10 +227,12 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing coord = coordinate(input.name, n_global, n_local, input.ngrid, input.nelement_global, input.nelement_local, input.nrank, input.irank, input.L, grid, cell_width, igrid, ielement, imin, imax, igrid_full, input.discretization, input.fd_option, input.cheb_option, - input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), scratch_shared, scratch_shared2, - scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, input.comm, - local_io_range, global_io_range, element_scale, element_shift, input.element_spacing_option, - element_boundaries, radau_first_element, other_nodes, one_over_denominator) + input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), + copy(scratch), copy(scratch), copy(scratch), copy(scratch), scratch_shared, + scratch_shared2, scratch_2d, copy(scratch_2d), advection, send_buffer, + receive_buffer, input.comm, local_io_range, global_io_range, element_scale, + element_shift, input.element_spacing_option, element_boundaries, + radau_first_element, other_nodes, one_over_denominator) if coord.n == 1 && occursin("v", coord.name) spectral = null_velocity_dimension_info() diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 6609d275c..a47f64c3d 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -709,7 +709,7 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, parallel_io=parallel_io, description="cumulative count of how many times each variable caused a " * "timestep failure for the run") - n_limit_vars = 4 + 1 + evolve_density + evolve_upar + evolve_ppar + 2 + n_limit_vars = 4 + 1 + evolve_density + evolve_upar + evolve_ppar + 1 + (nl_solver_params.vpa_advection === nothing) if n_neutral_species > 0 n_limit_vars += 1 + evolve_density + evolve_upar + evolve_ppar + 2 end diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index db193e24d..399654ef1 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -74,6 +74,7 @@ struct time_info{Terrorsum <: Real, Trkimp, Timpzero} last_fail_proximity_factor::mk_float minimum_dt::mk_float maximum_dt::mk_float + implicit_vpa_advection::Bool write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index e14d414d2..443891f2f 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -47,6 +47,7 @@ include("moment_constraints.jl") include("fokker_planck_test.jl") include("fokker_planck_calculus.jl") include("fokker_planck.jl") +include("boundary_conditions.jl") include("advection.jl") include("vpa_advection.jl") include("z_advection.jl") @@ -55,7 +56,6 @@ include("vperp_advection.jl") include("neutral_r_advection.jl") include("neutral_z_advection.jl") include("neutral_vz_advection.jl") -include("boundary_conditions.jl") include("charge_exchange.jl") include("ionization.jl") include("krook_collisions.jl") @@ -326,7 +326,7 @@ function setup_moment_kinetics(input_dict::AbstractDict; z_spectral, r_spectral, composition, moments, t_input, code_time, dt, dt_before_last_fail, collisions, species, geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, - advection_structs, scratch_dummy, restarting) + advection_structs, scratch_dummy, restarting, input_dict) # This is the closest we can get to the end time of the setup before writing it to the # output file diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 09301a919..dc3c8e9c6 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -211,6 +211,7 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) last_fail_proximity_factor=1.05, minimum_dt=0.0, maximum_dt=Inf, + implicit_vpa_advection=true, write_after_fixed_step_count=false, high_precision_error_sum=false, ) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 963ab17ad..fe82a5e34 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -225,6 +225,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # of grid points, so we can use a tolerance that is independent of the size of the # grid. This is unlike the norms needed in `linear_solve!()`. residual_norm = distributed_error_norm(residual, coords) + if isnan(residual_norm) + error("NaN in Newton iteration at iteration $counter") + end if residual_norm > previous_residual_norm # Do a line search between x and x+delta_x to try to find an update that does # decrease residual_norm @@ -507,7 +510,7 @@ function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, # called inside a parallelised s_r_z_vperp loop. if length(args) == 0 for i ∈ eachindex(result) - result = func() + result[i] = func() end else map!(func, result, args...) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index d1a39d5de..de047c2ca 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -35,7 +35,7 @@ using ..neutral_r_advection: update_speed_neutral_r!, neutral_advection_r! using ..neutral_z_advection: update_speed_neutral_z!, neutral_advection_z! using ..neutral_vz_advection: update_speed_neutral_vz!, neutral_advection_vz! using ..vperp_advection: update_speed_vperp!, vperp_advection! -using ..vpa_advection: update_speed_vpa!, vpa_advection! +using ..vpa_advection: update_speed_vpa!, vpa_advection!, implicit_vpa_advection! using ..charge_exchange: charge_exchange_collisions_1V!, charge_exchange_collisions_3V! using ..ionization: ionization_collisions_1V!, ionization_collisions_3V!, constant_ionization_source! using ..krook_collisions: krook_collisions! @@ -289,6 +289,11 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, dfns_output_times = mk_float[] end + if rk_coefs_implicit === nothing + # Not an IMEX scheme, so cannot have any implicit terms + t_input["implicit_vpa_advection"] = false + end + if t_input["high_precision_error_sum"] error_sum_zero = Float128(0.0) else @@ -304,8 +309,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, t_input["step_update_prefactor"], t_input["max_increase_factor"], t_input["max_increase_factor_near_last_fail"], t_input["last_fail_proximity_factor"], t_input["minimum_dt"], - t_input["maximum_dt"], - t_input["write_after_fixed_step_count"], + t_input["maximum_dt"], t_input["implicit_vpa_advection"], + t_input["write_after_fixed_step_count"], error_sum_zero, t_input["split_operators"], t_input["steady_state_residual"], t_input["converged_residual_value"], manufactured_solns_input.use_for_advance, t_input["stopfile_name"]) @@ -325,7 +330,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop dt_before_last_fail_reload, collisions, species, geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, advection_structs, - scratch_dummy, restarting) + scratch_dummy, restarting, input_dict) # define some local variables for convenience/tidiness n_ion_species = composition.n_ion_species n_neutral_species = composition.n_neutral_species @@ -375,7 +380,11 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop push!(t_params.limit_caused_by, 0, 0, 0, 0) # ion pdf - push!(t_params.limit_caused_by, 0, 0, 0) # RK accuracy plus 2 CFL limits + push!(t_params.limit_caused_by, 0) # RK accuracy + push!(t_params.limit_caused_by, 0) # z-advection CFL limit + if !t_params.implicit_vpa_advection + push!(t_params.limit_caused_by, 0) # vpa-advection CFL limit + end push!(t_params.failure_caused_by, 0) if moments.evolve_density # ion density @@ -429,7 +438,17 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # Set up parameters for Jacobian-free Newton-Krylov solver used for implicit part of # timesteps. - nl_solver_params = () + if t_params.implicit_vpa_advection + # Implicit solve for vpa_advection term should be done in serial, as it will be + # called within a parallelised s_r_z_vperp loop. + nl_solver_vpa_advection_params = setup_nonlinear_solve(input_dict, (vpa=vpa,); + default_rtol=t_params.rtol, + default_atol=t_params.atol, + serial_solve=true) + else + nl_solver_vpa_advection_params = nothing + end + nl_solver_params = (vpa_advection=nl_solver_vpa_advection_params,) begin_serial_region() @@ -711,7 +730,9 @@ function setup_advance_flags(moments, composition, t_params, collisions, # otherwise, check to see if the flags need to be set to true if !t_params.split_operators # default for non-split operators is to include both vpa and z advection together - advance_vpa_advection = vpa.n > 1 && z.n > 1 + # If using an IMEX scheme and implicit vpa advection has been requested, then vpa + # advection is not included in the explicit part of the timestep. + advance_vpa_advection = vpa.n > 1 && z.n > 1 && !t_params.implicit_vpa_advection advance_vperp_advection = vperp.n > 1 && z.n > 1 advance_z_advection = z.n > 1 advance_r_advection = r.n > 1 @@ -871,6 +892,9 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions if t_params.split_operators error("Implicit timesteps do not support `t_params.split_operators=true`") end + if t_params.implicit_vpa_advection + advance_vpa_advection = true + end manufactured_solns_test = manufactured_solns_input.use_for_advance @@ -1799,19 +1823,21 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end push!(CFL_limits, t_params.CFL_prefactor * ion_z_CFL) - # ion vpa-advection - begin_r_z_vperp_region() - ion_vpa_CFL = Inf - update_speed_vpa!(vpa_advect, fields, scratch[end], moments, vpa, vperp, z, r, - composition, collisions, external_source_settings.ion, t, - geometry) - @loop_s is begin - this_minimum = get_minimum_CFL_vpa(vpa_advect[is].speed, vpa) - @serial_region begin - ion_vpa_CFL = min(ion_vpa_CFL, this_minimum) + if !t_params.implicit_vpa_advection + # ion vpa-advection + begin_r_z_vperp_region() + ion_vpa_CFL = Inf + update_speed_vpa!(vpa_advect, fields, scratch[end], moments, vpa, vperp, z, r, + composition, collisions, external_source_settings.ion, t, + geometry) + @loop_s is begin + this_minimum = get_minimum_CFL_vpa(vpa_advect[is].speed, vpa) + @serial_region begin + ion_vpa_CFL = min(ion_vpa_CFL, this_minimum) + end end + push!(CFL_limits, t_params.CFL_prefactor * ion_vpa_CFL) end - push!(CFL_limits, t_params.CFL_prefactor * ion_vpa_CFL) # To avoid double counting points when we use distributed-memory MPI, skip the # inner/lower point in r and z if this process is not the first block in that @@ -2472,7 +2498,18 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects manufactured_source_list, external_source_settings, num_diss_params, nl_solver_params, advance, fp_arrays, istage) - # No terms supported here yet + vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral + vz_spectral, vr_spectral, vzeta_spectral = spectral_objects.vz_spectral, spectral_objects.vr_spectral, spectral_objects.vzeta_spectral + vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect + neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect + + if advance.vpa_advection + implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, vpa_advect, vpa, + vperp, z, r, dt, t, vpa_spectral, composition, collisions, + external_source_settings.ion, geometry, nl_solver_params, + num_diss_params.ion.vpa_dissipation_coefficient > 0.0, + num_diss_params.ion.force_minimum_pdf_value) + end return nothing end diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 9d4881b0b..155eb5597 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -6,8 +6,13 @@ export vpa_advection! export update_speed_vpa! using ..advection: advance_f_local! +using ..boundary_conditions: enforce_v_boundary_condition_local! using ..communication using ..looping +using ..moment_constraints: hard_force_moment_constraints! +using ..nonlinear_solvers: newton_solve! + +using ..boundary_conditions: vpagrid_to_dzdt """ """ @@ -30,6 +35,133 @@ function vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, vperp, z, end end +""" +""" +function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, vperp, z, + r, dt, t, vpa_spectral, composition, collisions, + ion_source_settings, geometry, nl_solver_params, + vpa_diffusion, minval) + + if vperp.n > 1 && (moments.evolve_density || moments.evolve_upar || moments.evolve_ppar) + error("Moment constraints in implicit_vpa_advection!() do not support 2V runs yet") + end + + # calculate the advection speed corresponding to current f + update_speed_vpa!(advect, fields, fvec_in, moments, vpa, vperp, z, r, composition, + collisions, ion_source_settings, t, geometry) + + + begin_s_r_z_vperp_region() + coords = (vpa=vpa,) + vpa_bc = vpa.bc + @loop_s is begin + @loop_r_z_vperp ir iz ivperp begin + f_old = @view fvec_in.pdf[:,ivperp,iz,ir,is] + this_f_out = @view f_out[:,ivperp,iz,ir,is] + speed = @view advect[is].speed[:,ivperp,iz,ir] + + # Define a function whose input is `f_new`, so that when it's output + # `residual` is zero, f_new is the result of a backward-Euler timestep: + # (f_new - f_old) / dt = RHS(f_new) + # ⇒ f_new - f_old - dt*RHS(f_new) = 0 + function residual_func!(residual, f_new) + # Boundary condition + enforce_v_boundary_condition_local!(f_new, vpa_bc, speed, vpa_diffusion, + vpa, vpa_spectral) + + # Moment constraints + # When we implement 2V moment kinetics, the constraints will couple vpa + # and vperp dimensions, so this will no longer be a 1V operation. + #hard_force_moment_constraints!(f_new, moments, vpa) + + # Minimum value constraint + #if minval !== nothing + # @. f_new = max(f_new, minval) + #end + + residual .= f_old + advance_f_local!(residual, f_new, advect[is], ivperp, iz, ir, vpa, dt, + vpa_spectral) + + if z.bc == "wall" + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual will all be + # zero at exactly the same set of grid points, so it is reasonable to + # zero-out `residual` to impose the boundary condition. + zero = 1.0e-14 + if z.irank == 0 && iz == 1 + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + @loop_vpa ivpa begin + # for left boundary in zed (z = -Lz/2), want + # f(z=-Lz/2, v_parallel > 0) = 0 + if vpa.scratch[ivpa] > zero + residual[ivpa] = 0.0 + end + end + end + # absolute velocity at right boundary + if z.irank == z.nrank - 1 && iz == z.n + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + @loop_vpa ivpa begin + # for right boundary in zed (z = Lz/2), want + # f(z=Lz/2, v_parallel < 0) = 0 + if vpa.scratch[ivpa] < -zero + residual[ivpa] = 0.0 + end + end + end + end + + # Now + # residual = f_old + dt*RHS(f_new) + # so update to desired residual + @. residual = f_new - residual + end + + # Buffers + # Note vpa,scratch is used by advance_f!, so we cannot use it here. + residual = vpa.scratch2 + delta_x = vpa.scratch3 + rhs_delta = vpa.scratch4 + v = vpa.scratch5 + w = vpa.scratch6 + + # Use forward-Euler step for initial guess + # By passing this_f_out, which is equal to f_old at this point, the 'residual' + # is + # f_new - f_old - dt*RHS(f_old) = -dt*RHS(f_old) + # so to get a forward-Euler step we have to subtract this 'residual' + residual_func!(residual, this_f_out) + this_f_out .-= residual + + newton_solve!(this_f_out, residual_func!, residual, delta_x, rhs_delta, v, w, + nl_solver_params.vpa_advection, coords=coords) + + # Boundary condition on final result + enforce_v_boundary_condition_local!(this_f_out, vpa_bc, speed, vpa_diffusion, + vpa, vpa_spectral) + + # Moment constraints on final result + # When we implement 2V moment kinetics, the constraints will couple vpa + # and vperp dimensions, so this will no longer be a 1V operation. + hard_force_moment_constraints!(this_f_out, moments, vpa) + + # Minimum value constraint on final result + if minval !== nothing + @. this_f_out = max(this_f_out, minval) + end + end + end + + return nothing +end + """ calculate the advection speed in the vpa-direction at each grid point """ From 6c4467521691a3927cfdcc725b0f64a976a245ac Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 12 May 2024 20:36:08 +0100 Subject: [PATCH 34/75] Start working on preconditioner for implicit_vpa_advection!() --- moment_kinetics/src/vpa_advection.jl | 131 ++++++++++++++++++++++----- 1 file changed, 108 insertions(+), 23 deletions(-) diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 155eb5597..a78fb9321 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -13,6 +13,8 @@ using ..moment_constraints: hard_force_moment_constraints! using ..nonlinear_solvers: newton_solve! using ..boundary_conditions: vpagrid_to_dzdt +using LinearAlgebra +using SparseArrays """ """ @@ -60,6 +62,107 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v this_f_out = @view f_out[:,ivperp,iz,ir,is] speed = @view advect[is].speed[:,ivperp,iz,ir] + if z.irank == 0 && iz == 1 + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_lower_z = vpa.n + for ivpa ∈ vpa.n:-1:1 + # for left boundary in zed (z = -Lz/2), want + # f(z=-Lz/2, v_parallel > 0) = 0 + if vpa.scratch[ivpa] ≤ zero + icut_lower_z = ivpa + 1 + break + end + end + end + if z.irank == z.nrank - 1 && iz == z.n + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_upper_z = 0 + for ivpa ∈ 1:vpa.n + # for right boundary in zed (z = Lz/2), want + # f(z=Lz/2, v_parallel < 0) = 0 + if vpa.scratch[ivpa] ≥ -zero + icut_upper_z = ivpa - 1 + break + end + end + end + + advection_matrix = allocate_float(vpa.n, vpa.n) + advection_matrix .= 0.0 + for i ∈ 1:vpa.nelement_local + imin = vpa.imin[i] - (i != 1) + imax = vpa.imax[i] + if i == 1 + advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + else + if speed[imin] < 0.0 + advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + elseif speed[imin] > 0.0 + # Do nothing + else + advection_matrix[imin,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + end + end + advection_matrix[imin+1:imax-1,imin:imax] .+= vpa_spectral.lobatto.Dmat[2:end-1,:] ./ vpa.element_scale[i] + if i == vpa.nelement_local + advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + else + if speed[imax] < 0.0 + # Do nothing + elseif speed[imax] > 0.0 + advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + else + advection_matrix[imax,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + end + end + end + # Multiply by advection speed + for i ∈ 1:vpa.n + advection_matrix[i,:] .*= dt * speed[i] + end + for i ∈ 1:vpa.n + advection_matrix[i,i] += 1.0 + end + # hacky (?) Dirichlet boundary conditions + this_f_out[1] = 0.0 + this_f_out[end] = 0.0 + advection_matrix[1,:] .= 0.0 + advection_matrix[1,1] = 1.0 + advection_matrix[end,:] .= 0.0 + advection_matrix[end,end] = 1.0 + + if z.bc == "wall" + if z.irank == 0 && iz == 1 + # Set equal df/dt equal to f on points that should be set to zero for + # boundary condition. The vector that the inverse of the advection matrix + # acts on should have zeros there already. + # I comes from LinearAlgebra and represents identity matrix + advection_matrix[icut_lower_z:end,icut_lower_z:end] .= I + end + if z.irank == z.nrank - 1 && iz == z.n + # Set equal df/dt equal to f on points that should be set to zero for + # boundary condition. The vector that the inverse of the advection matrix + # acts on should have zeros there already. + # I comes from LinearAlgebra and represents identity matrix + advection_matrix[1:icut_upper_z,1:icut_upper_z] .= I + end + end + + advection_matrix = sparse(advection_matrix) + preconditioner_lu = lu(advection_matrix) + preconditioner = (x) -> ldiv!(preconditioner_lu, x) + + #left_preconditioner = preconditioner + right_preconditioner = identity + left_preconditioner = identity + #right_preconditioner = preconditioner + # Define a function whose input is `f_new`, so that when it's output # `residual` is zero, f_new is the result of a backward-Euler timestep: # (f_new - f_old) / dt = RHS(f_new) @@ -90,31 +193,11 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v # zero-out `residual` to impose the boundary condition. zero = 1.0e-14 if z.irank == 0 && iz == 1 - @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], - fvec_in.upar[iz,ir,is], - moments.evolve_ppar, - moments.evolve_upar) - @loop_vpa ivpa begin - # for left boundary in zed (z = -Lz/2), want - # f(z=-Lz/2, v_parallel > 0) = 0 - if vpa.scratch[ivpa] > zero - residual[ivpa] = 0.0 - end - end + residual[icut_lower_z:end] .= 0.0 end # absolute velocity at right boundary if z.irank == z.nrank - 1 && iz == z.n - @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], - fvec_in.upar[iz,ir,is], - moments.evolve_ppar, - moments.evolve_upar) - @loop_vpa ivpa begin - # for right boundary in zed (z = Lz/2), want - # f(z=Lz/2, v_parallel < 0) = 0 - if vpa.scratch[ivpa] < -zero - residual[ivpa] = 0.0 - end - end + residual[1:icut_upper_z] .= 0.0 end end @@ -141,7 +224,9 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v this_f_out .-= residual newton_solve!(this_f_out, residual_func!, residual, delta_x, rhs_delta, v, w, - nl_solver_params.vpa_advection, coords=coords) + nl_solver_params.vpa_advection, coords=coords, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner) # Boundary condition on final result enforce_v_boundary_condition_local!(this_f_out, vpa_bc, speed, vpa_diffusion, From aa1a8ac07e130d35a14e23b6ad53d794ac3bc4d4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 14 May 2024 18:13:14 +0100 Subject: [PATCH 35/75] Test both z and vpa coords in nonlinear_solver_tests.jl --- moment_kinetics/test/nonlinear_solver_tests.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index dfca94b83..c592082ff 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -14,8 +14,8 @@ using moment_kinetics.type_definitions: mk_float, mk_int using MPI function linear_test() - @testset "linear test" begin - println(" - linear test") + println(" - linear test") + @testset "linear test $coord_names" for coord_names ∈ ((:z,), (:vpa,)) # Test represents constant-coefficient diffusion, in 1D steady state, with a # central finite-difference discretisation of the second derivative. # @@ -64,7 +64,7 @@ function linear_test() zeros(mk_float, 0), zeros(mk_float, 0), "", zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0), zeros(mk_float, 0, 0)) - coords = (z=the_coord,) + coords = NamedTuple(c => the_coord for c ∈ coord_names) function rhs_func!(residual, x) begin_serial_region() @@ -112,8 +112,8 @@ function linear_test() end function nonlinear_test() - @testset "non-linear test" begin - println(" - non-linear test") + println(" - non-linear test") + @testset "non-linear test" for coord_names ∈ ((:z,), (:vpa,)) # Test represents constant-coefficient diffusion, in 1D steady state, with a # central finite-difference discretisation of the second derivative. # @@ -149,7 +149,7 @@ function nonlinear_test() zeros(mk_float, 0), zeros(mk_float, 0), "", zeros(mk_float, 0), false, zeros(mk_float, 0, 0, 0), zeros(mk_float, 0, 0)) - coords = (z=the_coord) + coords = NamedTuple(c => the_coord for c ∈ coord_names) function rhs_func!(residual, x) begin_serial_region() From 403c509a83999ec043bce9d9609ea2f70e6cc407 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 14 May 2024 18:42:53 +0100 Subject: [PATCH 36/75] Option to re-factorise preconditioner only after certain interval --- moment_kinetics/src/moment_kinetics.jl | 2 +- moment_kinetics/src/nonlinear_solvers.jl | 32 +++++- moment_kinetics/src/time_advance.jl | 12 ++- moment_kinetics/src/vpa_advection.jl | 122 ++++++++++++----------- 4 files changed, 102 insertions(+), 66 deletions(-) diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 443891f2f..08c9f25ec 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -30,10 +30,10 @@ include("quadrature.jl") include("hermite_spline_interpolation.jl") include("derivatives.jl") include("input_structs.jl") -include("nonlinear_solvers.jl") include("runge_kutta.jl") include("reference_parameters.jl") include("coordinates.jl") +include("nonlinear_solvers.jl") include("file_io.jl") include("geo.jl") include("gyroaverages.jl") diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index fe82a5e34..ccca7f63e 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -29,6 +29,7 @@ export setup_nonlinear_solve, newton_solve! using ..array_allocation: allocate_float, allocate_shared_float using ..communication +using ..coordinates: coordinate using ..input_structs using ..looping using ..type_definitions: mk_float, mk_int @@ -36,8 +37,9 @@ using ..type_definitions: mk_float, mk_int using LinearAlgebra using MINPACK using MPI +using SparseArrays -struct nl_solver_info{TH,TV,Tlig} +struct nl_solver_info{TH,TV,Tlig,Tprecon} rtol::mk_float atol::mk_float linear_rtol::mk_float @@ -50,7 +52,10 @@ struct nl_solver_info{TH,TV,Tlig} n_solves::Ref{mk_int} nonlinear_iterations::Ref{mk_int} linear_iterations::Ref{mk_int} + stage_counter::Ref{mk_int} serial_solve::Bool + preconditioner_update_interval::mk_int + preconditioners::Tprecon end """ @@ -58,9 +63,13 @@ end `coords` is a NamedTuple of coordinates corresponding to the dimensions of the variable that will be solved. The entries in `coords` should be ordered the same as the memory layout of the variable to be solved (i.e. fastest-varying first). + +The nonlinear solver will be called inside a loop over `outer_coords`, so we might need +for example a preconditioner object for each point in that outer loop. """ -function setup_nonlinear_solve(input_dict, coords; default_rtol=1.0e-5, - default_atol=1.0e-12, serial_solve=false) +function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol=1.0e-5, + default_atol=1.0e-12, serial_solve=false, + preconditioner_type=nothing) nl_solver_section = set_defaults_and_check_section!( input_dict, "nonlinear_solver"; rtol=default_rtol, @@ -69,9 +78,13 @@ function setup_nonlinear_solve(input_dict, coords; default_rtol=1.0e-5, linear_atol=1.0e-15, linear_restart=10, linear_max_restarts=0, + preconditioner_update_interval=300, ) nl_solver_input = Dict_to_NamedTuple(nl_solver_section) + total_size_coords = prod(isa(c, coordinate) ? c.n : c for c ∈ values(coords)) + outer_coord_sizes = Tuple(isa(c, coordinate) ? c.n : c for c ∈ outer_coords) + linear_restart = nl_solver_input.linear_restart if serial_solve @@ -90,12 +103,23 @@ function setup_nonlinear_solve(input_dict, coords; default_rtol=1.0e-5, end end + if preconditioner_type == "lu" + # Create dummy LU solver objects so we can create an array for preconditioners. + # These will be calculated properly within the time loop. + preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)), + reverse(outer_coord_sizes)) + else + preconditioners = nothing + end + linear_initial_guess = zeros(linear_restart) return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol, nl_solver_input.linear_rtol, nl_solver_input.linear_atol, linear_restart, nl_solver_input.linear_max_restarts, H, V, - linear_initial_guess, Ref(0), Ref(0), Ref(0), serial_solve) + linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), + serial_solve, nl_solver_input.preconditioner_update_interval, + preconditioners) end """ diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index de047c2ca..cd014d734 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -441,10 +441,11 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop if t_params.implicit_vpa_advection # Implicit solve for vpa_advection term should be done in serial, as it will be # called within a parallelised s_r_z_vperp loop. - nl_solver_vpa_advection_params = setup_nonlinear_solve(input_dict, (vpa=vpa,); - default_rtol=t_params.rtol, - default_atol=t_params.atol, - serial_solve=true) + nl_solver_vpa_advection_params = + setup_nonlinear_solve(input_dict, (vpa=vpa,), + (composition.n_ion_species, r, z, vperp); + default_rtol=t_params.rtol, default_atol=t_params.atol, + serial_solve=true, preconditioner_type="lu") else nl_solver_vpa_advection_params = nothing end @@ -2506,7 +2507,8 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects if advance.vpa_advection implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, vpa_advect, vpa, vperp, z, r, dt, t, vpa_spectral, composition, collisions, - external_source_settings.ion, geometry, nl_solver_params, + external_source_settings.ion, geometry, + nl_solver_params.vpa_advection, num_diss_params.ion.vpa_dissipation_coefficient > 0.0, num_diss_params.ion.force_minimum_pdf_value) end diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index a78fb9321..b6f6d9c76 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -12,6 +12,7 @@ using ..looping using ..moment_constraints: hard_force_moment_constraints! using ..nonlinear_solvers: newton_solve! +using ..array_allocation: allocate_float using ..boundary_conditions: vpagrid_to_dzdt using LinearAlgebra using SparseArrays @@ -56,6 +57,7 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v begin_s_r_z_vperp_region() coords = (vpa=vpa,) vpa_bc = vpa.bc + zero = 1.0e-14 @loop_s is begin @loop_r_z_vperp ir iz ivperp begin f_old = @view fvec_in.pdf[:,ivperp,iz,ir,is] @@ -93,71 +95,78 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v end end - advection_matrix = allocate_float(vpa.n, vpa.n) - advection_matrix .= 0.0 - for i ∈ 1:vpa.nelement_local - imin = vpa.imin[i] - (i != 1) - imax = vpa.imax[i] - if i == 1 - advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] - else - if speed[imin] < 0.0 + if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + advection_matrix = allocate_float(vpa.n, vpa.n) + advection_matrix .= 0.0 + for i ∈ 1:vpa.nelement_local + imin = vpa.imin[i] - (i != 1) + imax = vpa.imax[i] + if i == 1 advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] - elseif speed[imin] > 0.0 - # Do nothing else - advection_matrix[imin,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + if speed[imin] < 0.0 + advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + elseif speed[imin] > 0.0 + # Do nothing + else + advection_matrix[imin,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + end end - end - advection_matrix[imin+1:imax-1,imin:imax] .+= vpa_spectral.lobatto.Dmat[2:end-1,:] ./ vpa.element_scale[i] - if i == vpa.nelement_local - advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] - else - if speed[imax] < 0.0 - # Do nothing - elseif speed[imax] > 0.0 + advection_matrix[imin+1:imax-1,imin:imax] .+= vpa_spectral.lobatto.Dmat[2:end-1,:] ./ vpa.element_scale[i] + if i == vpa.nelement_local advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] else - advection_matrix[imax,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + if speed[imax] < 0.0 + # Do nothing + elseif speed[imax] > 0.0 + advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + else + advection_matrix[imax,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + end end end - end - # Multiply by advection speed - for i ∈ 1:vpa.n - advection_matrix[i,:] .*= dt * speed[i] - end - for i ∈ 1:vpa.n - advection_matrix[i,i] += 1.0 - end - # hacky (?) Dirichlet boundary conditions - this_f_out[1] = 0.0 - this_f_out[end] = 0.0 - advection_matrix[1,:] .= 0.0 - advection_matrix[1,1] = 1.0 - advection_matrix[end,:] .= 0.0 - advection_matrix[end,end] = 1.0 - - if z.bc == "wall" - if z.irank == 0 && iz == 1 - # Set equal df/dt equal to f on points that should be set to zero for - # boundary condition. The vector that the inverse of the advection matrix - # acts on should have zeros there already. - # I comes from LinearAlgebra and represents identity matrix - advection_matrix[icut_lower_z:end,icut_lower_z:end] .= I + # Multiply by advection speed + for i ∈ 1:vpa.n + advection_matrix[i,:] .*= dt * speed[i] end - if z.irank == z.nrank - 1 && iz == z.n - # Set equal df/dt equal to f on points that should be set to zero for - # boundary condition. The vector that the inverse of the advection matrix - # acts on should have zeros there already. - # I comes from LinearAlgebra and represents identity matrix - advection_matrix[1:icut_upper_z,1:icut_upper_z] .= I + for i ∈ 1:vpa.n + advection_matrix[i,i] += 1.0 + end + # hacky (?) Dirichlet boundary conditions + this_f_out[1] = 0.0 + this_f_out[end] = 0.0 + advection_matrix[1,:] .= 0.0 + advection_matrix[1,1] = 1.0 + advection_matrix[end,:] .= 0.0 + advection_matrix[end,end] = 1.0 + + if z.bc == "wall" + if z.irank == 0 && iz == 1 + # Set equal df/dt equal to f on points that should be set to zero for + # boundary condition. The vector that the inverse of the advection matrix + # acts on should have zeros there already. + advection_matrix[icut_lower_z:end,icut_lower_z:end] .= 0.0 + for i ∈ icut_lower_z:vpa.n + advection_matrix[i,i] = 1.0 + end + end + if z.irank == z.nrank - 1 && iz == z.n + # Set equal df/dt equal to f on points that should be set to zero for + # boundary condition. The vector that the inverse of the advection matrix + # acts on should have zeros there already. + # I comes from LinearAlgebra and represents identity matrix + advection_matrix[1:icut_upper_z,1:icut_upper_z] .= 0.0 + for i ∈ 1:icut_upper_z + advection_matrix[i,i] = 1.0 + end + end end - end - advection_matrix = sparse(advection_matrix) - preconditioner_lu = lu(advection_matrix) - preconditioner = (x) -> ldiv!(preconditioner_lu, x) + advection_matrix = sparse(advection_matrix) + nl_solver_params.preconditioners[ivperp,iz,ir,is] = lu(advection_matrix) + end + preconditioner = (x) -> ldiv!(nl_solver_params.preconditioners[ivperp,iz,ir,is], x) #left_preconditioner = preconditioner right_preconditioner = identity left_preconditioner = identity @@ -191,7 +200,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v # change in this implicit step, f_new, f_old, and residual will all be # zero at exactly the same set of grid points, so it is reasonable to # zero-out `residual` to impose the boundary condition. - zero = 1.0e-14 if z.irank == 0 && iz == 1 residual[icut_lower_z:end] .= 0.0 end @@ -224,7 +232,7 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v this_f_out .-= residual newton_solve!(this_f_out, residual_func!, residual, delta_x, rhs_delta, v, w, - nl_solver_params.vpa_advection, coords=coords, + nl_solver_params, coords=coords, left_preconditioner=left_preconditioner, right_preconditioner=right_preconditioner) @@ -244,6 +252,8 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v end end + nl_solver_params.stage_counter[] += 1 + return nothing end From 1cdcdefd398cec3702343e552efbc9f6bcda61ce Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 15 May 2024 13:21:54 +0100 Subject: [PATCH 37/75] Fix application of boundary conditions in residual_func!() Need to be applied to the result, not the input f_new, to ensure that each calculated f_new obeys the boundary conditions. --- moment_kinetics/src/vpa_advection.jl | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index b6f6d9c76..ff6404129 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -177,10 +177,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v # (f_new - f_old) / dt = RHS(f_new) # ⇒ f_new - f_old - dt*RHS(f_new) = 0 function residual_func!(residual, f_new) - # Boundary condition - enforce_v_boundary_condition_local!(f_new, vpa_bc, speed, vpa_diffusion, - vpa, vpa_spectral) - # Moment constraints # When we implement 2V moment kinetics, the constraints will couple vpa # and vperp dimensions, so this will no longer be a 1V operation. @@ -195,11 +191,23 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v advance_f_local!(residual, f_new, advect[is], ivperp, iz, ir, vpa, dt, vpa_spectral) + # Now + # residual = f_old + dt*RHS(f_new) + # so update to desired residual + @. residual = f_new - residual + + # Boundary condition + enforce_v_boundary_condition_local!(residual, vpa_bc, speed, vpa_diffusion, + vpa, vpa_spectral) + if z.bc == "wall" # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_new, f_old, and residual will all be - # zero at exactly the same set of grid points, so it is reasonable to - # zero-out `residual` to impose the boundary condition. + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. if z.irank == 0 && iz == 1 residual[icut_lower_z:end] .= 0.0 end @@ -208,11 +216,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v residual[1:icut_upper_z] .= 0.0 end end - - # Now - # residual = f_old + dt*RHS(f_new) - # so update to desired residual - @. residual = f_new - residual end # Buffers From e08511438e18046034b7f10a4a76f60f590e0528 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 15 May 2024 13:34:56 +0100 Subject: [PATCH 38/75] Include vpa diffusion when doing implicit vpa advection Might help by making the 1d part of the PDE that is being advanced explicitly well-posed on its own? --- moment_kinetics/src/time_advance.jl | 10 ++++++---- moment_kinetics/src/vpa_advection.jl | 11 +++++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index cd014d734..246f45cb6 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -830,7 +830,9 @@ function setup_advance_flags(moments, composition, t_params, collisions, # flag to determine if a d^2/dr^2 operator is present r_diffusion = (advance_numerical_dissipation && num_diss_params.ion.r_dissipation_coefficient > 0.0) # flag to determine if a d^2/dvpa^2 operator is present - vpa_diffusion = ((advance_numerical_dissipation && num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) + # When using implicit_vpa_advection, the vpa diffusion is included in the implicit + # step + vpa_diffusion = !t_params.implicit_vpa_advection && ((advance_numerical_dissipation && num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) vperp_diffusion = ((advance_numerical_dissipation && num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) vz_diffusion = (advance_numerical_dissipation && num_diss_params.neutral.vz_dissipation_coefficient > 0.0) end @@ -895,6 +897,7 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions end if t_params.implicit_vpa_advection advance_vpa_advection = true + vpa_diffusion = true end manufactured_solns_test = manufactured_solns_input.use_for_advance @@ -2508,9 +2511,8 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, vpa_advect, vpa, vperp, z, r, dt, t, vpa_spectral, composition, collisions, external_source_settings.ion, geometry, - nl_solver_params.vpa_advection, - num_diss_params.ion.vpa_dissipation_coefficient > 0.0, - num_diss_params.ion.force_minimum_pdf_value) + nl_solver_params.vpa_advection, advance.vpa_diffusion, + num_diss_params) end return nothing diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index ff6404129..037502465 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -14,6 +14,7 @@ using ..nonlinear_solvers: newton_solve! using ..array_allocation: allocate_float using ..boundary_conditions: vpagrid_to_dzdt +using ..calculus: second_derivative! using LinearAlgebra using SparseArrays @@ -43,8 +44,7 @@ end function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, vperp, z, r, dt, t, vpa_spectral, composition, collisions, ion_source_settings, geometry, nl_solver_params, - vpa_diffusion, minval) - + vpa_diffusion, num_diss_params) if vperp.n > 1 && (moments.evolve_density || moments.evolve_upar || moments.evolve_ppar) error("Moment constraints in implicit_vpa_advection!() do not support 2V runs yet") end @@ -57,6 +57,8 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v begin_s_r_z_vperp_region() coords = (vpa=vpa,) vpa_bc = vpa.bc + minval = num_diss_params.ion.force_minimum_pdf_value + vpa_dissipation_coefficient = num_diss_params.ion.vpa_dissipation_coefficient zero = 1.0e-14 @loop_s is begin @loop_r_z_vperp ir iz ivperp begin @@ -191,6 +193,11 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v advance_f_local!(residual, f_new, advect[is], ivperp, iz, ir, vpa, dt, vpa_spectral) + if vpa_diffusion + second_derivative!(vpa.scratch, f_new, vpa, vpa_spectral) + @. residual += dt * vpa_dissipation_coefficient * vpa.scratch + end + # Now # residual = f_old + dt*RHS(f_new) # so update to desired residual From d339a7eea321f77017181dfd799d3c059a9f16a8 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 15 May 2024 14:53:41 +0100 Subject: [PATCH 39/75] Include diffusion in preconditioner, assuming weak-form implementation --- moment_kinetics/src/vpa_advection.jl | 35 +++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 037502465..9b7dd2d1d 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -134,6 +134,10 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v for i ∈ 1:vpa.n advection_matrix[i,i] += 1.0 end + # This allocates a new matrix - to avoid this would need to pre-allocate a + # suitable buffer somewhere and use `mul!()`. + advection_matrix = vpa_spectral.mass_matrix * advection_matrix + @. advection_matrix -= dt * vpa_dissipation_coefficient * vpa_spectral.K_matrix # hacky (?) Dirichlet boundary conditions this_f_out[1] = 0.0 this_f_out[end] = 0.0 @@ -168,7 +172,36 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v nl_solver_params.preconditioners[ivperp,iz,ir,is] = lu(advection_matrix) end - preconditioner = (x) -> ldiv!(nl_solver_params.preconditioners[ivperp,iz,ir,is], x) + function preconditioner(x) + # Multiply by mass matrix, storing result in vpa.scratch + mul!(vpa.scratch, vpa_spectral.mass_matrix, x) + + # Handle boundary conditions + enforce_v_boundary_condition_local!(vpa.scratch, vpa_bc, speed, vpa_diffusion, + vpa, vpa_spectral) + + if z.bc == "wall" + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. + if z.irank == 0 && iz == 1 + vpa.scratch[icut_lower_z:end] .= 0.0 +# println("at icut_lower_z ", f_new[icut_lower_z], " ", f_old[icut_lower_z]) + end + # absolute velocity at right boundary + if z.irank == z.nrank - 1 && iz == z.n + vpa.scratch[1:icut_upper_z] .= 0.0 + end + end + + # Do LU application on vpa.scratch, storing result in x + ldiv!(x, nl_solver_params.preconditioners[ivperp,iz,ir,is], vpa.scratch) + return nothing + end #left_preconditioner = preconditioner right_preconditioner = identity left_preconditioner = identity From cb8cc6ba81875886f018ec341743292a83b73148 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 16 May 2024 21:24:55 +0100 Subject: [PATCH 40/75] Function to apply moment constraints to a 'residual' --- moment_kinetics/src/moment_constraints.jl | 64 +++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index 5b78063b2..8beb1a279 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -140,4 +140,68 @@ function hard_force_moment_constraints_neutral!(f, moments, vz) return A, B, C end +""" + moment_constraints_on_residual!(residual, f, moments, vpa) + +A 'residual' (used in implicit timestepping) is an update to the distribution function +\$f_\\mathrm{new} = f_\\mathrm{old} + \\mathtt{residual}\$. \$f_\\mathrm{new}\$ should +obey the moment constraints ([Constraints on normalized distribution function](@ref)), and +\$f_\\mathrm{old}\$ already obeys the constraints, which means that the first 3 moments of +`residual` should be zero. We impose this constraint by adding corrections proportional to +`f`. +```math +r = \\hat{r} + (A + B w_{\\|} + C w_{\\|}^2) f +``` + +Note this function assumes the input is given at a single spatial position. +""" +function moment_constraints_on_residual!(residual::AbstractArray{T,N}, + f::AbstractArray{T,N}, moments, vpa) where {T,N} + if N == 2 + f1d = @view f[:,1] + r1d = @view residual[:,1] + end + if moments.evolve_ppar + I0 = integrate_over_vspace(f1d, vpa.wgts) + I1 = integrate_over_vspace(f1d, vpa.grid, vpa.wgts) + I2 = integrate_over_vspace(f1d, vpa.grid, 2, vpa.wgts) + I3 = integrate_over_vspace(f1d, vpa.grid, 3, vpa.wgts) + I4 = integrate_over_vspace(f1d, vpa.grid, 4, vpa.wgts) + J0 = integrate_over_vspace(r1d, vpa.wgts) + J1 = integrate_over_vspace(r1d, vpa.grid, vpa.wgts) + J2 = integrate_over_vspace(r1d, vpa.grid, 2, vpa.wgts) + + A = ((I2*J2 - J0*I4)*(I2*I4 - I3^2) + (I2*I3 - I1*I4)*(J2*I3 - J1*I4)) / + ((I0*I4 - I2^2)*(I2*I4 - I3^2) - (I2*I3 - I1*I4)^2) + B = (J2*I3 - J1*I4 + (I2*I3 - I1*I4)*A) / (I2*I4 - I3^2) + C = -(J2 + I2*A + I3*B) / I4 + + @. r1d = r1d + (A + B*vpa.grid + C*vpa.grid*vpa.grid) * f1d + elseif moments.evolve_upar + I0 = integrate_over_vspace(f1d, vpa.wgts) + I1 = integrate_over_vspace(f1d, vpa.grid, vpa.wgts) + I2 = integrate_over_vspace(f1d, vpa.grid, 2, vpa.wgts) + J0 = integrate_over_vspace(r1d, vpa.wgts) + J1 = integrate_over_vspace(r1d, vpa.grid, vpa.wgts) + + A = (I1*J1 - J0*I2) / (I0*I2 - I1^2) + B = -(J1 + I1*A) / I2 + + @. r1d = r1d + (A + B*vpa.grid) * f1d + + C = NaN + elseif moments.evolve_density + I0 = integrate_over_vspace(f1d, vpa.wgts) + J0 = integrate_over_vspace(r1d, vpa.wgts) + A = -J0 / I0 + @. f1d = A * f1d + @. r1d = r1d + A * f1d + + B = NaN + C = NaN + end + + return A, B, C +end + end From b952af2d2e5b38ab3aaa81169ac05dca9c9ca7d3 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 16 May 2024 21:29:19 +0100 Subject: [PATCH 41/75] Impose moment constraints on implicit-solve residual --- moment_kinetics/src/moment_constraints.jl | 46 +++++++++-------- moment_kinetics/src/vpa_advection.jl | 62 ++++++++++++----------- 2 files changed, 58 insertions(+), 50 deletions(-) diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index 8beb1a279..ae7c8b156 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -158,45 +158,49 @@ Note this function assumes the input is given at a single spatial position. function moment_constraints_on_residual!(residual::AbstractArray{T,N}, f::AbstractArray{T,N}, moments, vpa) where {T,N} if N == 2 - f1d = @view f[:,1] - r1d = @view residual[:,1] + f = @view f[:,1] + residual = @view residual[:,1] end if moments.evolve_ppar - I0 = integrate_over_vspace(f1d, vpa.wgts) - I1 = integrate_over_vspace(f1d, vpa.grid, vpa.wgts) - I2 = integrate_over_vspace(f1d, vpa.grid, 2, vpa.wgts) - I3 = integrate_over_vspace(f1d, vpa.grid, 3, vpa.wgts) - I4 = integrate_over_vspace(f1d, vpa.grid, 4, vpa.wgts) - J0 = integrate_over_vspace(r1d, vpa.wgts) - J1 = integrate_over_vspace(r1d, vpa.grid, vpa.wgts) - J2 = integrate_over_vspace(r1d, vpa.grid, 2, vpa.wgts) + I0 = integrate_over_vspace(f, vpa.wgts) + I1 = integrate_over_vspace(f, vpa.grid, vpa.wgts) + I2 = integrate_over_vspace(f, vpa.grid, 2, vpa.wgts) + I3 = integrate_over_vspace(f, vpa.grid, 3, vpa.wgts) + I4 = integrate_over_vspace(f, vpa.grid, 4, vpa.wgts) + J0 = integrate_over_vspace(residual, vpa.wgts) + J1 = integrate_over_vspace(residual, vpa.grid, vpa.wgts) + J2 = integrate_over_vspace(residual, vpa.grid, 2, vpa.wgts) A = ((I2*J2 - J0*I4)*(I2*I4 - I3^2) + (I2*I3 - I1*I4)*(J2*I3 - J1*I4)) / ((I0*I4 - I2^2)*(I2*I4 - I3^2) - (I2*I3 - I1*I4)^2) B = (J2*I3 - J1*I4 + (I2*I3 - I1*I4)*A) / (I2*I4 - I3^2) C = -(J2 + I2*A + I3*B) / I4 - @. r1d = r1d + (A + B*vpa.grid + C*vpa.grid*vpa.grid) * f1d + @. residual = residual + (A + B*vpa.grid + C*vpa.grid*vpa.grid) * f elseif moments.evolve_upar - I0 = integrate_over_vspace(f1d, vpa.wgts) - I1 = integrate_over_vspace(f1d, vpa.grid, vpa.wgts) - I2 = integrate_over_vspace(f1d, vpa.grid, 2, vpa.wgts) - J0 = integrate_over_vspace(r1d, vpa.wgts) - J1 = integrate_over_vspace(r1d, vpa.grid, vpa.wgts) + I0 = integrate_over_vspace(f, vpa.wgts) + I1 = integrate_over_vspace(f, vpa.grid, vpa.wgts) + I2 = integrate_over_vspace(f, vpa.grid, 2, vpa.wgts) + J0 = integrate_over_vspace(residual, vpa.wgts) + J1 = integrate_over_vspace(residual, vpa.grid, vpa.wgts) A = (I1*J1 - J0*I2) / (I0*I2 - I1^2) B = -(J1 + I1*A) / I2 - @. r1d = r1d + (A + B*vpa.grid) * f1d + @. residual = residual + (A + B*vpa.grid) * f C = NaN elseif moments.evolve_density - I0 = integrate_over_vspace(f1d, vpa.wgts) - J0 = integrate_over_vspace(r1d, vpa.wgts) + I0 = integrate_over_vspace(f, vpa.wgts) + J0 = integrate_over_vspace(residual, vpa.wgts) A = -J0 / I0 - @. f1d = A * f1d - @. r1d = r1d + A * f1d + @. f = A * f + @. residual = residual + A * f + B = NaN + C = NaN + else + A = NaN B = NaN C = NaN end diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 9b7dd2d1d..220f45173 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -9,7 +9,8 @@ using ..advection: advance_f_local! using ..boundary_conditions: enforce_v_boundary_condition_local! using ..communication using ..looping -using ..moment_constraints: hard_force_moment_constraints! +using ..moment_constraints: hard_force_moment_constraints!, + moment_constraints_on_residual! using ..nonlinear_solvers: newton_solve! using ..array_allocation: allocate_float @@ -212,16 +213,30 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v # (f_new - f_old) / dt = RHS(f_new) # ⇒ f_new - f_old - dt*RHS(f_new) = 0 function residual_func!(residual, f_new) - # Moment constraints - # When we implement 2V moment kinetics, the constraints will couple vpa - # and vperp dimensions, so this will no longer be a 1V operation. - #hard_force_moment_constraints!(f_new, moments, vpa) - - # Minimum value constraint - #if minval !== nothing - # @. f_new = max(f_new, minval) - #end + function apply_bc!(x) + # Boundary condition + enforce_v_boundary_condition_local!(x, vpa_bc, speed, vpa_diffusion, + vpa, vpa_spectral) + + if z.bc == "wall" + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. + if z.irank == 0 && iz == 1 + x[icut_lower_z:end] .= 0.0 + end + # absolute velocity at right boundary + if z.irank == z.nrank - 1 && iz == z.n + x[1:icut_upper_z] .= 0.0 + end + end + end + apply_bc!(f_new) residual .= f_old advance_f_local!(residual, f_new, advect[is], ivperp, iz, ir, vpa, dt, vpa_spectral) @@ -231,31 +246,20 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v @. residual += dt * vpa_dissipation_coefficient * vpa.scratch end + # Make sure updated f will not contain negative values + #@. residual = max(residual, minval) + # Now # residual = f_old + dt*RHS(f_new) # so update to desired residual @. residual = f_new - residual - # Boundary condition - enforce_v_boundary_condition_local!(residual, vpa_bc, speed, vpa_diffusion, - vpa, vpa_spectral) + apply_bc!(residual) - if z.bc == "wall" - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_new, f_old, and residual should all - # be zero at exactly the same set of grid points, so it is reasonable - # to zero-out `residual` to impose the boundary condition. We impose - # this after subtracting f_old in case rounding errors, etc. mean that - # at some point f_old had a different boundary condition cut-off - # index. - if z.irank == 0 && iz == 1 - residual[icut_lower_z:end] .= 0.0 - end - # absolute velocity at right boundary - if z.irank == z.nrank - 1 && iz == z.n - residual[1:icut_upper_z] .= 0.0 - end - end + # Impose moment constraints on residual + # When we implement 2V moment kinetics, the constraints will couple vpa + # and vperp dimensions, so this will no longer be a 1V operation. + moment_constraints_on_residual!(residual, f_new, moments, vpa) end # Buffers From f8167f4581cefbf5d2be859b9b1b8349bec26360 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 17 May 2024 15:13:01 +0100 Subject: [PATCH 42/75] By default set nonlinear solver tols 1/10 times timesolver tols May help to avoid the timestep being reduced unnecessarily. --- moment_kinetics/src/time_advance.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 246f45cb6..6b14d426f 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -444,7 +444,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop nl_solver_vpa_advection_params = setup_nonlinear_solve(input_dict, (vpa=vpa,), (composition.n_ion_species, r, z, vperp); - default_rtol=t_params.rtol, default_atol=t_params.atol, + default_rtol=t_params.rtol / 10.0, + default_atol=t_params.atol / 10.0, serial_solve=true, preconditioner_type="lu") else nl_solver_vpa_advection_params = nothing From 7b1737f2b83c6b241428e3317becd822221d4e18 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 17 May 2024 15:58:41 +0100 Subject: [PATCH 43/75] Simplify setup of output variables for time solver diagnostics Get lengths from lengths of corresponding variables in t_params, rather than re-calculating. --- moment_kinetics/src/file_io.jl | 36 +++++++++++--------------- moment_kinetics/src/moment_kinetics.jl | 3 ++- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index a47f64c3d..200f55ce3 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -198,7 +198,7 @@ open the necessary output files function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, restart_time_index, - previous_runs_info, time_for_setup, nl_solver_params) + previous_runs_info, time_for_setup, t_params, nl_solver_params) begin_serial_region() @serial_region begin # Only read/write from first process in each 'block' @@ -226,14 +226,14 @@ function setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vpe external_source_settings, input_dict, io_input.parallel_io, comm_inter_block[], run_id, restart_time_index, previous_runs_info, - time_for_setup, nl_solver_params) + time_for_setup, t_params, nl_solver_params) io_dfns = setup_dfns_io(out_prefix, io_input.binary_format, boundary_distributions, r, z, vperp, vpa, vzeta, vr, vz, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, io_input.parallel_io, comm_inter_block[], run_id, restart_time_index, previous_runs_info, time_for_setup, - nl_solver_params) + t_params, nl_solver_params) return ascii, io_moments, io_dfns end @@ -649,7 +649,7 @@ define dynamic (time-evolving) moment variables for writing to the hdf5 file function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, r::coordinate, z::coordinate, parallel_io, external_source_settings, evolve_density, - evolve_upar, evolve_ppar, + evolve_upar, evolve_ppar, t_params, nl_solver_params) @serial_region begin dynamic = create_io_group(fid, "dynamic_data", description="time evolving variables") @@ -700,19 +700,13 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, dynamic, "failure_counter", mk_int; parallel_io=parallel_io, description="cumulative number of timestep failures for the run") - n_failure_vars = 1 + evolve_density + evolve_upar + evolve_ppar - if n_neutral_species > 0 - n_failure_vars *= 2 - end + n_failure_vars = length(t_params.failure_caused_by) io_failure_caused_by = create_dynamic_variable!( dynamic, "failure_caused_by", mk_int; diagnostic_var_size=n_failure_vars, parallel_io=parallel_io, description="cumulative count of how many times each variable caused a " * "timestep failure for the run") - n_limit_vars = 4 + 1 + evolve_density + evolve_upar + evolve_ppar + 1 + (nl_solver_params.vpa_advection === nothing) - if n_neutral_species > 0 - n_limit_vars += 1 + evolve_density + evolve_upar + evolve_ppar + 2 - end + n_limit_vars = length(t_params.limit_caused_by) io_limit_caused_by = create_dynamic_variable!( dynamic, "limit_caused_by", mk_int; diagnostic_var_size=n_limit_vars, parallel_io=parallel_io, @@ -1095,7 +1089,7 @@ file """ function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, composition, parallel_io, external_source_settings, - evolve_density, evolve_upar, evolve_ppar, + evolve_density, evolve_upar, evolve_ppar, t_params, nl_solver_params) @serial_region begin @@ -1104,7 +1098,7 @@ function define_dynamic_dfn_variables!(fid, r, z, vperp, vpa, vzeta, vr, vz, com parallel_io, external_source_settings, evolve_density, evolve_upar, - evolve_ppar, + evolve_ppar, t_params, nl_solver_params) dynamic = get_group(fid, "dynamic_data") @@ -1176,7 +1170,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, parallel_io, io_comm, run_id, restart_time_index, previous_runs_info, - time_for_setup, nl_solver_params) + time_for_setup, t_params, nl_solver_params) @serial_region begin moments_prefix = string(prefix, ".moments") if !parallel_io @@ -1206,7 +1200,7 @@ function setup_moments_io(prefix, binary_format, vz, vr, vzeta, vpa, vperp, r, z io_moments = define_dynamic_moment_variables!( fid, composition.n_ion_species, composition.n_neutral_species, r, z, parallel_io, external_source_settings, evolve_density, evolve_upar, - evolve_ppar, nl_solver_params) + evolve_ppar, t_params, nl_solver_params) close(fid) @@ -1286,7 +1280,7 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper vzeta, vr, vz, composition, collisions, evolve_density, evolve_upar, evolve_ppar, external_source_settings, input_dict, parallel_io, io_comm, run_id, restart_time_index, - previous_runs_info, time_for_setup, nl_solver_params) + previous_runs_info, time_for_setup, t_params, nl_solver_params) @serial_region begin dfns_prefix = string(prefix, ".dfns") @@ -1322,7 +1316,7 @@ function setup_dfns_io(prefix, binary_format, boundary_distributions, r, z, vper ### in a struct for later access ### io_dfns = define_dynamic_dfn_variables!( fid, r, z, vperp, vpa, vzeta, vr, vz, composition, parallel_io, - external_source_settings, evolve_density, evolve_upar, evolve_ppar, + external_source_settings, evolve_density, evolve_upar, evolve_ppar, t_params, nl_solver_params) close(fid) @@ -1958,7 +1952,7 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor #qr_neutral=nothing, qzeta_neutral=nothing, vth_neutral=nothing, phi=nothing, Er=nothing, Ez=nothing, - istage=0, label="", nl_solver_params=()) + istage=0, label="", t_params=nothing, nl_solver_params=()) global debug_output_file # Only read/write from first process in each 'block' @@ -1990,12 +1984,12 @@ function debug_dump(vz::coordinate, vr::coordinate, vzeta::coordinate, vpa::coor r, z, false, external_source_settings, evolve_density, evolve_upar, - evolve_ppar, + evolve_ppar, t_params, nl_solver_params) io_dfns = define_dynamic_dfn_variables!( fid, r, z, vperp, vpa, vzeta, vr, vz, composition.n_ion_species, composition.n_neutral_species, false, external_source_settings, - evolve_density, evolve_upar, evolve_ppar, nl_solver_params) + evolve_density, evolve_upar, evolve_ppar, t_params, nl_solver_params) # create the "istage" variable, used to identify the rk stage where # `debug_dump()` was called diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 08c9f25ec..90fd7f1bd 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -336,7 +336,8 @@ function setup_moment_kinetics(input_dict::AbstractDict; ascii_io, io_moments, io_dfns = setup_file_io(io_input, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r, composition, collisions, moments.evolve_density, moments.evolve_upar, moments.evolve_ppar, external_source_settings, input_dict, - restart_time_index, previous_runs_info, time_for_setup, nl_solver_params) + restart_time_index, previous_runs_info, time_for_setup, t_params, + nl_solver_params) # write initial data to ascii files write_data_to_ascii(pdf, moments, fields, vpa, vperp, z, r, code_time, composition.n_ion_species, composition.n_neutral_species, ascii_io) From b6cdca393c79d216f9553a2bd649f890f7d011de Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 17 May 2024 15:38:42 +0100 Subject: [PATCH 44/75] Timestep failure to decrease timestep when nonlinear iteration fails --- .../src/makie_post_processing.jl | 8 ++ moment_kinetics/src/nonlinear_solvers.jl | 6 +- moment_kinetics/src/runge_kutta.jl | 67 +++++++++++--- moment_kinetics/src/time_advance.jl | 88 ++++++++++++------- moment_kinetics/src/vpa_advection.jl | 13 +-- 5 files changed, 132 insertions(+), 50 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 24fd3ae2f..c4916e025 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7257,6 +7257,14 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) linestyle=:dash, label=prefix * "failures caused by p_neutral", ax=ax_failures) end + if occursin("ARK", ri.t_input["type"]) + # Nonlinear iteration failed to converge in implicit part of + # timestep + counter += 1 + plot_1d(time, @view failure_caused_by_per_output[counter,:]; + linestyle=:dot, + label=prefix * "nonlinear iteration convergence failure", ax=ax_failures) + end end if counter > size(failure_caused_by_per_output, 1) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index ccca7f63e..a704b638b 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -203,6 +203,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, close_counter = -1 close_linear_counter = -1 + success = true previous_residual_norm = residual_norm while residual_norm > 1.0 counter += 1 @@ -293,7 +294,8 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, end if counter > 100000 - error("maximum iteration limit reached") + println("maximum iteration limit reached") + success = false break end end @@ -309,6 +311,8 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # println("Total linear iterations after close: ", linear_counter - close_linear_counter) # println("Linear iterations per Newton after close: ", (linear_counter - close_linear_counter) / (counter - close_counter)) # println() + + return success end """ diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index a16cd8d8b..087df7dff 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -961,7 +961,8 @@ end Use the calculated `CFL_limits` and `error_norms` to update the timestep in `t_params`. """ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms, - total_points, current_dt, error_norm_method) + total_points, current_dt, error_norm_method, + success) # Get global minimum of CFL limits CFL_limit = nothing this_limit_caused_by = nothing @@ -1019,10 +1020,50 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er just_completed_output_step = false - # Use current_dt instead of t_params.dt[] here because we are about to write to - # the shared-memory variable t_params.dt[] below, and we do not want to add an extra - # _block_synchronize() call after reading it here. - if (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt + if !success + # Iteration failed in implicit part of timestep try decreasing timestep + + # Set scratch[end] equal to scratch[1] to start the timestep over + scratch_temp = scratch[t_params.n_rk_stages+1] + scratch[t_params.n_rk_stages+1] = scratch[1] + scratch[1] = scratch_temp + + @serial_region begin + t_params.failure_counter[] += 1 + + if t_params.previous_dt[] > 0.0 + # If previous_dt=0, the previous step was also a failure so only update + # dt_before_last_fail when previous_dt>0 + t_params.dt_before_last_fail[] = t_params.previous_dt[] + end + + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_output[] = false + + # Decrease timestep by 1/2 - this factor should probably be settable! + # Note when nonlinear solve iteration fails, we do not enforce + # minimum_dt, as the timesolver must error if we do not decrease dt. + if t_params.dt[] > t_params.minimum_dt + # ...but try decreasing just to minimum_dt first, if the dt is still + # bigger than this. + t_params.dt[] = max(t_params.dt[] / 2.0, t_params.minimum_dt) + else + t_params.dt[] = t_params.dt[] / 2.0 + end + + # Don't update the simulation time, as this step failed + t_params.previous_dt[] = 0.0 + + # Call the 'cause' of the timestep failure the variable that has the biggest + # error norm here + t_params.failure_caused_by[end] += 1 + end + elseif (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt + # Use current_dt instead of t_params.dt[] here because we are about to write to + # the shared-memory variable t_params.dt[] below, and we do not want to add an + # extra _block_synchronize() call after reading it here. + # # Timestep failed, reduce timestep and re-try # Set scratch[end] equal to scratch[1] to start the timestep over @@ -1049,14 +1090,6 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er t_params.dt[] * t_params.step_update_prefactor * error_norm^(-1.0/t_params.rk_order)) t_params.dt[] = max(t_params.dt[], t_params.minimum_dt) - minimum_dt = 1.e-14 - if t_params.dt[] < minimum_dt - println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " - * "$minimum_dt at t=$t. Ending run.") - # Set dt negative to signal an error - t_params.dt[] = -1.0 - end - # Don't update the simulation time, as this step failed t_params.previous_dt[] = 0.0 @@ -1151,6 +1184,14 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er end @serial_region begin + minimum_dt = 1.e-14 + if t_params.dt[] < minimum_dt + println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " + * "$minimum_dt at t=$t. Ending run.") + # Set dt negative to signal an error + t_params.dt[] = -1.0 + end + current_time = t + t_params.previous_dt[] if (!t_params.write_after_fixed_step_count && !just_completed_output_step && (current_time + t_params.dt[] >= t_params.next_output_time[])) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 6b14d426f..13cec7ede 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -421,6 +421,9 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop push!(t_params.failure_caused_by, 0) end end + if t_params.rk_coefs_implicit !== nothing + push!(t_params.failure_caused_by, 0) # Nonlinear iteration fails to converge + end # create the 'advance' struct to be used in later Euler advance to # indicate which parts of the equations are to be advanced concurrently. @@ -1772,7 +1775,7 @@ end fields, composition, collisions, geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz) + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success) Check the error estimate for the embedded RK method and adjust the timestep if appropriate. @@ -1781,7 +1784,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen fields, composition, collisions, geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz) + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, + success) #error_norm_method = "Linf" error_norm_method = "L2" @@ -1985,7 +1989,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms, - total_points, current_dt, error_norm_method) + total_points, current_dt, error_norm_method, + success) if t_params.previous_dt[] == 0.0 # Re-update remaining velocity moments that are calculable from the evolved @@ -2126,6 +2131,8 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa _block_synchronize() end + # success is set to false if an iteration failed to converge in an implicit solve + success = true for istage ∈ 1:n_rk_stages if global_rank[] == 0 println("ion step ", t_params.step_counter[], ".", istage, " ", t) @@ -2154,14 +2161,23 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa # Note the timestep for this solve is rk_coefs_implict[istage,istage]*dt. # The diagonal elements are equal to the Butcher 'a' coefficients # rk_coefs_implicit[istage,istage]=a[istage,istage]. - backward_euler!(scratch_implicit[istage], scratch[istage], pdf, - fields, moments, advect_objects, vz, vr, vzeta, vpa, - vperp, gyrophase, z, r, t, - t_params.dt[] * t_params.rk_coefs_implicit[istage,istage], - spectral_objects, composition, collisions, geometry, - scratch_dummy, manufactured_source_list, - external_source_settings, num_diss_params, - nl_solver_params, advance_implicit, fp_arrays, istage) + success = backward_euler!(scratch_implicit[istage], scratch[istage], pdf, + fields, moments, advect_objects, vz, vr, vzeta, + vpa, vperp, gyrophase, z, r, t, t_params.dt[] * + t_params.rk_coefs_implicit[istage,istage], + spectral_objects, composition, collisions, + geometry, scratch_dummy, + manufactured_source_list, + external_source_settings, num_diss_params, + nl_solver_params, advance_implicit, fp_arrays, + istage) + success = MPI.Allreduce(success, &, comm_world) + if !success + # Jump to final stage, as passing `success = false` to the adaptive + # timestep update function will signal a failed timestep, so that we + # restart this timestep with a smaller `dt`. + istage = n_rk_stages + end # The result of the implicit solve gives the state vector at 'istage' # which is used as input to the explicit part of the IMEX time step. # Note that boundary conditions and constraints should already have been @@ -2182,19 +2198,23 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa # quantities and scratch[istage] containing quantities at time level n, RK stage # istage # calculate f^{(1)} = fⁿ + Δt*G[fⁿ] = scratch[2].pdf - euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments, - advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, - r, t, t_params.dt[], spectral_objects, composition, - collisions, geometry, scratch_dummy, - manufactured_source_list, external_source_settings, - num_diss_params, advance, fp_arrays, istage) - - diagnostic_moments = diagnostic_checks && istage == n_rk_stages - rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) - apply_all_bcs_constraints_update_moments!( - scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta, - vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, - gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments) + if success + euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, + r, t, t_params.dt[], spectral_objects, composition, + collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, advance, fp_arrays, istage) + + diagnostic_moments = diagnostic_checks && istage == n_rk_stages + rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) + apply_all_bcs_constraints_update_moments!( + scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments) + else + break + end end if t_params.adaptive @@ -2202,7 +2222,9 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa composition, collisions, geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz) + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success) + elseif !success + error("Implicit part of timestep failed") end istage = n_rk_stages+1 @@ -2509,14 +2531,18 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect if advance.vpa_advection - implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, vpa_advect, vpa, - vperp, z, r, dt, t, vpa_spectral, composition, collisions, - external_source_settings.ion, geometry, - nl_solver_params.vpa_advection, advance.vpa_diffusion, - num_diss_params) + success = implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, + vpa_advect, vpa, vperp, z, r, dt, t, + vpa_spectral, composition, collisions, + external_source_settings.ion, geometry, + nl_solver_params.vpa_advection, + advance.vpa_diffusion, num_diss_params) + if !success + return success + end end - return nothing + return true end """ diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 220f45173..815ab71ac 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -278,10 +278,13 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v residual_func!(residual, this_f_out) this_f_out .-= residual - newton_solve!(this_f_out, residual_func!, residual, delta_x, rhs_delta, v, w, - nl_solver_params, coords=coords, - left_preconditioner=left_preconditioner, - right_preconditioner=right_preconditioner) + success = newton_solve!(this_f_out, residual_func!, residual, delta_x, + rhs_delta, v, w, nl_solver_params, coords=coords, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner) + if !success + return success + end # Boundary condition on final result enforce_v_boundary_condition_local!(this_f_out, vpa_bc, speed, vpa_diffusion, @@ -301,7 +304,7 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v nl_solver_params.stage_counter[] += 1 - return nothing + return true end """ From 7b07c574b54395973a03b59de3d3dd07ba3950ea Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 18 May 2024 15:37:47 +0100 Subject: [PATCH 45/75] Fix IMEX time advance Need to apply boundary conditions and constraints to the result of any explicit steps of implicit terms (happens for first stage of 'ESDIRK' methods). Need to update derived moments and moment derivatives after implicit part of stage. When doing explicit step of implicit terms, use correct `scratch` as input for the explicit part of the timestep. --- moment_kinetics/src/time_advance.jl | 57 +++++++++++++--------------- moment_kinetics/src/vpa_advection.jl | 14 ------- 2 files changed, 26 insertions(+), 45 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 13cec7ede..083ececaa 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -2154,8 +2154,7 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa # The result of the forward-Euler step is just a hack to store the # (explicit) time-derivative of the implicitly advanced terms. The result # is not used as input to the explicit part of the IMEX advance. - update_solution_vector!(scratch[istage+1], scratch[istage], moments, - composition, vpa, vperp, z, r) + old_scratch = scratch[istage] else # Backward-Euler step for implicitly-evolved terms. # Note the timestep for this solve is rk_coefs_implict[istage,istage]*dt. @@ -2173,48 +2172,44 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa istage) success = MPI.Allreduce(success, &, comm_world) if !success - # Jump to final stage, as passing `success = false` to the adaptive - # timestep update function will signal a failed timestep, so that we - # restart this timestep with a smaller `dt`. - istage = n_rk_stages + # Break out of the istage loop, as passing `success = false` to the + # adaptive timestep update function will signal a failed timestep, so + # that we restart this timestep with a smaller `dt`. + break end # The result of the implicit solve gives the state vector at 'istage' # which is used as input to the explicit part of the IMEX time step. - # Note that boundary conditions and constraints should already have been - # applied to the solution in `scratch_implicit[istage]`, as part of the - # `backward_euler!()` solve. - update_solution_vector!(scratch[istage+1], scratch_implicit[istage], moments, - composition, vpa, vperp, z, r) + old_scratch = scratch_implicit[istage] end - old_scratch = scratch_implicit[istage] + apply_all_bcs_constraints_update_moments!( + scratch_implicit[istage], moments, fields, boundary_distributions, vz, vr, + vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, + geometry, gyroavs, num_diss_params, advance, scratch_dummy, false; + pdf_bc_constraints=false) else # Fully explicit method starts the forward-Euler step with the result from the # previous stage. - update_solution_vector!(scratch[istage+1], scratch[istage], moments, - composition, vpa, vperp, z, r) old_scratch = scratch[istage] end + update_solution_vector!(scratch[istage+1], old_scratch, moments, composition, vpa, + vperp, z, r) # do an Euler time advance, with scratch[istage+1] containing the advanced # quantities and scratch[istage] containing quantities at time level n, RK stage # istage # calculate f^{(1)} = fⁿ + Δt*G[fⁿ] = scratch[2].pdf - if success - euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments, - advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, - r, t, t_params.dt[], spectral_objects, composition, - collisions, geometry, scratch_dummy, - manufactured_source_list, external_source_settings, - num_diss_params, advance, fp_arrays, istage) - - diagnostic_moments = diagnostic_checks && istage == n_rk_stages - rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) - apply_all_bcs_constraints_update_moments!( - scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta, - vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, - gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments) - else - break - end + euler_time_advance!(scratch[istage+1], old_scratch, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, + r, t, t_params.dt[], spectral_objects, composition, + collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, advance, fp_arrays, istage) + + diagnostic_moments = diagnostic_checks && istage == n_rk_stages + rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) + apply_all_bcs_constraints_update_moments!( + scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments) end if t_params.adaptive diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 815ab71ac..8456205c0 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -285,20 +285,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v if !success return success end - - # Boundary condition on final result - enforce_v_boundary_condition_local!(this_f_out, vpa_bc, speed, vpa_diffusion, - vpa, vpa_spectral) - - # Moment constraints on final result - # When we implement 2V moment kinetics, the constraints will couple vpa - # and vperp dimensions, so this will no longer be a 1V operation. - hard_force_moment_constraints!(this_f_out, moments, vpa) - - # Minimum value constraint on final result - if minval !== nothing - @. this_f_out = max(this_f_out, minval) - end end end From e344afa1eba0d910eef6f3116f4d913d36e337de Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 18 May 2024 15:52:44 +0100 Subject: [PATCH 46/75] Fix advance flags for numerical dissipation to work with IMEX --- moment_kinetics/src/time_advance.jl | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 083ececaa..c8ed7868d 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -796,7 +796,7 @@ function setup_advance_flags(moments, composition, t_params, collisions, end advance_external_source = external_source_settings.ion.active advance_neutral_external_source = external_source_settings.neutral.active - advance_numerical_dissipation = true + advance_numerical_dissipation = !t_params.implicit_vpa_advection # if evolving the density, must advance the continuity equation, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation @@ -831,14 +831,18 @@ function setup_advance_flags(moments, composition, t_params, collisions, end end + # *_diffusion flags are set regardless of whether diffusion is included in explicit or + # implicit part of timestep, because they are used for boundary conditions, not to + # controll which terms are advanced. + # # flag to determine if a d^2/dr^2 operator is present - r_diffusion = (advance_numerical_dissipation && num_diss_params.ion.r_dissipation_coefficient > 0.0) + r_diffusion = (num_diss_params.ion.r_dissipation_coefficient > 0.0) # flag to determine if a d^2/dvpa^2 operator is present # When using implicit_vpa_advection, the vpa diffusion is included in the implicit # step - vpa_diffusion = !t_params.implicit_vpa_advection && ((advance_numerical_dissipation && num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) - vperp_diffusion = ((advance_numerical_dissipation && num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) - vz_diffusion = (advance_numerical_dissipation && num_diss_params.neutral.vz_dissipation_coefficient > 0.0) + vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) + vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) + vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0) end manufactured_solns_test = manufactured_solns_input.use_for_advance @@ -901,8 +905,20 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions end if t_params.implicit_vpa_advection advance_vpa_advection = true - vpa_diffusion = true + advance_numerical_dissipation = true end + # *_diffusion flags are set regardless of whether diffusion is included in explicit or + # implicit part of timestep, because they are used for boundary conditions, not to + # controll which terms are advanced. + # + # flag to determine if a d^2/dr^2 operator is present + r_diffusion = (num_diss_params.ion.r_dissipation_coefficient > 0.0) + # flag to determine if a d^2/dvpa^2 operator is present + # When using implicit_vpa_advection, the vpa diffusion is included in the implicit + # step + vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) + vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) + vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0) manufactured_solns_test = manufactured_solns_input.use_for_advance From 730820914923a099f0f8c08c2d3230bc5fc991c4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 18 May 2024 18:56:16 +0100 Subject: [PATCH 47/75] Fix moment constraints for IMEX schemes Moment constraints should not be imposed individually on the results of the explicit and implicit parts of the RK stages. The constraints are only obeyed analytically by the complete kinetic equation, so forcing a distribution function updated with only a subset of terms to obey the constraints would mess up the time advance. The constraints should only be imposed when the complete RK stage (both explicit and implicit parts) has been completed - which means they should be imposed after the _implicit_ part, except at the final RK stage where the output of the complete timestep is computed (if we were using Butcher table coefficients, this would be the step where the final result is calculated using the 'b' vector coefficients). --- moment_kinetics/src/coordinates.jl | 10 +++-- moment_kinetics/src/time_advance.jl | 24 ++++++++---- moment_kinetics/src/vpa_advection.jl | 58 ++++++++++++++-------------- 3 files changed, 52 insertions(+), 40 deletions(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 743b33545..d034a2dd0 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -83,6 +83,8 @@ struct coordinate{T <: AbstractVector{mk_float}} scratch5::Array{mk_float,1} # scratch6 is an array used for intermediate calculations requiring n entries scratch6::Array{mk_float,1} + # scratch7 is an array used for intermediate calculations requiring n entries + scratch7::Array{mk_float,1} # scratch_shared is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared::T @@ -228,10 +230,10 @@ function define_coordinate(input, parallel_io::Bool=false; run_directory=nothing input.nelement_global, input.nelement_local, input.nrank, input.irank, input.L, grid, cell_width, igrid, ielement, imin, imax, igrid_full, input.discretization, input.fd_option, input.cheb_option, input.bc, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), - copy(scratch), copy(scratch), copy(scratch), copy(scratch), scratch_shared, - scratch_shared2, scratch_2d, copy(scratch_2d), advection, send_buffer, - receive_buffer, input.comm, local_io_range, global_io_range, element_scale, - element_shift, input.element_spacing_option, element_boundaries, + copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), + scratch_shared, scratch_shared2, scratch_2d, copy(scratch_2d), advection, + send_buffer, receive_buffer, input.comm, local_io_range, global_io_range, + element_scale, element_shift, input.element_spacing_option, element_boundaries, radau_first_element, other_nodes, one_over_denominator) if coord.n == 1 && occursin("v", coord.name) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index c8ed7868d..55aa4460b 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -2196,12 +2196,12 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa # The result of the implicit solve gives the state vector at 'istage' # which is used as input to the explicit part of the IMEX time step. old_scratch = scratch_implicit[istage] + apply_all_bcs_constraints_update_moments!( + scratch_implicit[istage], moments, fields, boundary_distributions, vz, + vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, + composition, geometry, gyroavs, num_diss_params, advance, + scratch_dummy, false) end - apply_all_bcs_constraints_update_moments!( - scratch_implicit[istage], moments, fields, boundary_distributions, vz, vr, - vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, - geometry, gyroavs, num_diss_params, advance, scratch_dummy, false; - pdf_bc_constraints=false) else # Fully explicit method starts the forward-Euler step with the result from the # previous stage. @@ -2220,12 +2220,22 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa manufactured_source_list, external_source_settings, num_diss_params, advance, fp_arrays, istage) - diagnostic_moments = diagnostic_checks && istage == n_rk_stages rk_update!(scratch, scratch_implicit, moments, t_params, istage, composition) + + # Always apply boundary conditions and constraints here for explicit schemes. For + # IMEX schemes, only apply boundary conditions and constraints at the final RK + # stage - for other stages they are imposed after the implicit part of the step. + # If `implicit_coefficient_is_zero` is true for the next stage, then this step is + # explicit, so we need the bcs and constraints. + apply_bc_constraints = (t_params.rk_coefs_implicit === nothing + || istage == n_rk_stages + || t_params.implicit_coefficient_is_zero[istage+1]) + diagnostic_moments = diagnostic_checks && istage == n_rk_stages apply_all_bcs_constraints_update_moments!( scratch[istage+1], moments, fields, boundary_distributions, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, - gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments) + gyroavs, num_diss_params, advance, scratch_dummy, diagnostic_moments; + pdf_bc_constraints=apply_bc_constraints) end if t_params.adaptive diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 8456205c0..df2b778ca 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -63,7 +63,7 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v zero = 1.0e-14 @loop_s is begin @loop_r_z_vperp ir iz ivperp begin - f_old = @view fvec_in.pdf[:,ivperp,iz,ir,is] + f_old_no_bc = @view fvec_in.pdf[:,ivperp,iz,ir,is] this_f_out = @view f_out[:,ivperp,iz,ir,is] speed = @view advect[is].speed[:,ivperp,iz,ir] @@ -98,6 +98,34 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v end end + function apply_bc!(x) + # Boundary condition + enforce_v_boundary_condition_local!(x, vpa_bc, speed, vpa_diffusion, + vpa, vpa_spectral) + + if z.bc == "wall" + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. + if z.irank == 0 && iz == 1 + x[icut_lower_z:end] .= 0.0 + end + # absolute velocity at right boundary + if z.irank == z.nrank - 1 && iz == z.n + x[1:icut_upper_z] .= 0.0 + end + end + end + + # Need to apply 'new' boundary conditions to `f_old`, so that by imposing them + # on `residual`, they are automatically imposed on `f_new`. + f_old = vpa.scratch7 .= f_old_no_bc + apply_bc!(f_old) + if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 advection_matrix = allocate_float(vpa.n, vpa.n) advection_matrix .= 0.0 @@ -213,29 +241,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v # (f_new - f_old) / dt = RHS(f_new) # ⇒ f_new - f_old - dt*RHS(f_new) = 0 function residual_func!(residual, f_new) - function apply_bc!(x) - # Boundary condition - enforce_v_boundary_condition_local!(x, vpa_bc, speed, vpa_diffusion, - vpa, vpa_spectral) - - if z.bc == "wall" - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_new, f_old, and residual should all - # be zero at exactly the same set of grid points, so it is reasonable - # to zero-out `residual` to impose the boundary condition. We impose - # this after subtracting f_old in case rounding errors, etc. mean that - # at some point f_old had a different boundary condition cut-off - # index. - if z.irank == 0 && iz == 1 - x[icut_lower_z:end] .= 0.0 - end - # absolute velocity at right boundary - if z.irank == z.nrank - 1 && iz == z.n - x[1:icut_upper_z] .= 0.0 - end - end - end - apply_bc!(f_new) residual .= f_old advance_f_local!(residual, f_new, advect[is], ivperp, iz, ir, vpa, dt, @@ -255,11 +260,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v @. residual = f_new - residual apply_bc!(residual) - - # Impose moment constraints on residual - # When we implement 2V moment kinetics, the constraints will couple vpa - # and vperp dimensions, so this will no longer be a 1V operation. - moment_constraints_on_residual!(residual, f_new, moments, vpa) end # Buffers From 04f4d1cf1f7b1f74c3d13e8fbd812a487a645bd1 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 19 May 2024 14:04:41 +0100 Subject: [PATCH 48/75] Separate ionization/CX collisions functions for ions/neutrals Will be useful to allow possibility for ion and neutral advance to be split between explicit and implicit parts of timestep. --- moment_kinetics/src/charge_exchange.jl | 89 +++++++++------ moment_kinetics/src/input_structs.jl | 12 +- moment_kinetics/src/ionization.jl | 89 +++++++++------ moment_kinetics/src/time_advance.jl | 152 ++++++++++++++++++------- 4 files changed, 227 insertions(+), 115 deletions(-) diff --git a/moment_kinetics/src/charge_exchange.jl b/moment_kinetics/src/charge_exchange.jl index 66c1bb7fa..e70782c8c 100644 --- a/moment_kinetics/src/charge_exchange.jl +++ b/moment_kinetics/src/charge_exchange.jl @@ -9,12 +9,12 @@ using ..looping using ..interpolation: interpolate_to_grid_vpa! """ -update the evolved pdf for each ion and electron species to account for -charge exchange collisions between ions and neutrals +update the evolved pdf for each ion species to account for charge exchange collisions +between ions and neutrals """ -function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments, - composition, vpa, vz, charge_exchange_frequency, - vpa_spectral, vz_spectral, dt) +function ion_charge_exchange_collisions_1V!(f_out, fvec_in, moments, composition, vpa, vz, + charge_exchange_frequency, vpa_spectral, + vz_spectral, dt) # This routine assumes a 1D model with: # nvz = nvpa and identical vz and vpa grids @@ -32,19 +32,6 @@ function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments, moments.neutral.vth[:,:,is], moments, vpa, vz, charge_exchange_frequency, vz_spectral, dt) end - - begin_sn_r_z_region(no_synchronize=true) - @loop_sn isn begin - # apply CX collisions to all neutral species - # for each neutral species, obtain affect of charge exchange collisions - # with the corresponding ion species - @views charge_exchange_collisions_single_species!( - f_neutral_out[:,1,1,:,:,isn], fvec_in.pdf_neutral[:,1,1,:,:,isn], - fvec_in.pdf[:,1,:,:,isn], fvec_in.density[:,:,isn], - fvec_in.uz_neutral[:,:,isn], fvec_in.upar[:,:,isn], - moments.neutral.vth[:,:,isn], moments.ion.vth[:,:,isn], moments, - vz, vpa, charge_exchange_frequency, vpa_spectral, dt) - end else begin_s_r_z_region() @loop_s is begin @@ -58,8 +45,35 @@ function charge_exchange_collisions_1V!(f_out, f_neutral_out, fvec_in, moments, - fvec_in.pdf[ivpa,1,iz,ir,is]*fvec_in.density_neutral[iz,ir,is]) end end + end +end + +""" +update the evolved pdf for each neutral species to account for charge exchange collisions +between ions and neutrals +""" +function neutral_charge_exchange_collisions_1V!(f_neutral_out, fvec_in, moments, + composition, vpa, vz, + charge_exchange_frequency, vpa_spectral, + vz_spectral, dt) + # This routine assumes a 1D model with: + # nvz = nvpa and identical vz and vpa grids - begin_sn_r_z_region(no_synchronize=true) + if moments.evolve_density + begin_sn_r_z_region() + @loop_sn isn begin + # apply CX collisions to all neutral species + # for each neutral species, obtain affect of charge exchange collisions + # with the corresponding ion species + @views charge_exchange_collisions_single_species!( + f_neutral_out[:,1,1,:,:,isn], fvec_in.pdf_neutral[:,1,1,:,:,isn], + fvec_in.pdf[:,1,:,:,isn], fvec_in.density[:,:,isn], + fvec_in.uz_neutral[:,:,isn], fvec_in.upar[:,:,isn], + moments.neutral.vth[:,:,isn], moments.ion.vth[:,:,isn], moments, + vz, vpa, charge_exchange_frequency, vpa_spectral, dt) + end + else + begin_sn_r_z_region() @loop_sn isn begin # apply CX collisions to all neutral species # for each neutral species, obtain affect of charge exchange collisions @@ -135,21 +149,10 @@ function charge_exchange_collisions_single_species!(f_out, pdf_in, pdf_other, end end -function charge_exchange_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, f_ion_vrvzvzeta_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, - charge_exchange_frequency, dt) +function ion_charge_exchange_collisions_3V!(f_out, f_neutral_gav_in, fvec_in, composition, + vz, vr, vzeta, vpa, vperp, z, r, + charge_exchange_frequency, dt) # This routine assumes a 3V model with: - @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) - @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) - @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) - @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) - @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) - @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) - @boundscheck vz.n == size(f_ion_vrvzvzeta_in,1) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck vr.n == size(f_ion_vrvzvzeta_in,2) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck vzeta.n == size(f_ion_vrvzvzeta_in,3) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck z.n == size(f_ion_vrvzvzeta_in,4) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck r.n == size(f_ion_vrvzvzeta_in,5) || throw(BoundsError(f_ion_vrvzvzeta_in)) - @boundscheck composition.n_neutral_species == size(f_ion_vrvzvzeta_in,6) || throw(BoundsError(f_ion_vrvzvzeta_in)) @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out)) @boundscheck vperp.n == size(f_out,2) || throw(BoundsError(f_out)) @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out)) @@ -173,6 +176,26 @@ function charge_exchange_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, - fvec_in.pdf[ivpa,ivperp,iz,ir,is]*fvec_in.density_neutral[iz,ir,isn]) end end +end + +function neutral_charge_exchange_collisions_3V!(f_neutral_out, f_ion_vrvzvzeta_in, + fvec_in, composition, vz, vr, vzeta, vpa, + vperp, z, r, charge_exchange_frequency, + dt) + # This routine assumes a 3V model with: + @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) + @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) + @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) + @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) + @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) + @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) + @boundscheck vz.n == size(f_ion_vrvzvzeta_in,1) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck vr.n == size(f_ion_vrvzvzeta_in,2) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck vzeta.n == size(f_ion_vrvzvzeta_in,3) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck z.n == size(f_ion_vrvzvzeta_in,4) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck r.n == size(f_ion_vrvzvzeta_in,5) || throw(BoundsError(f_ion_vrvzvzeta_in)) + @boundscheck composition.n_neutral_species == size(f_ion_vrvzvzeta_in,6) || throw(BoundsError(f_ion_vrvzvzeta_in)) + begin_sn_r_z_vzeta_vr_vz_region() @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin # apply CX collisions to all neutral species diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 399654ef1..4c8b3d736 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -94,10 +94,14 @@ mutable struct advance_info neutral_z_advection::Bool neutral_r_advection::Bool neutral_vz_advection::Bool - cx_collisions::Bool - cx_collisions_1V::Bool - ionization_collisions::Bool - ionization_collisions_1V::Bool + ion_cx_collisions::Bool + neutral_cx_collisions::Bool + ion_cx_collisions_1V::Bool + neutral_cx_collisions_1V::Bool + ion_ionization_collisions::Bool + neutral_ionization_collisions::Bool + ion_ionization_collisions_1V::Bool + neutral_ionization_collisions_1V::Bool ionization_source::Bool krook_collisions_ii::Bool explicit_weakform_fp_collisions::Bool diff --git a/moment_kinetics/src/ionization.jl b/moment_kinetics/src/ionization.jl index a4e7ac3f1..63779c3ff 100644 --- a/moment_kinetics/src/ionization.jl +++ b/moment_kinetics/src/ionization.jl @@ -2,8 +2,10 @@ """ module ionization -export ionization_collisions_1V! -export ionization_collisions_3V! +export ion_ionization_collisions_1V! +export neutral_ionization_collisions_1V! +export ion_ionization_collisions_3V! +export neutral_ionization_collisions_3V! export constant_ionization_source! using ..interpolation: interpolate_to_grid_vpa! @@ -64,18 +66,12 @@ function constant_ionization_source!(f_out, fvec_in, vpa, vperp, z, r, moments, end end -function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp, z, r, - vz_spectral, moments, composition, collisions, dt) +function ion_ionization_collisions_1V!(f_out, fvec_in, vz, vpa, vperp, z, r, vz_spectral, + moments, composition, collisions, dt) # This routine assumes a 1D model with: # nvz = nvpa and identical vz and vpa grids # nvperp = nvr = nveta = 1 # constant charge_exchange_frequency independent of species - @boundscheck vpa.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) - @boundscheck 1 == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) - @boundscheck 1 == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) - @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) - @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) - @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out)) @boundscheck 1 == size(f_out,2) || throw(BoundsError(f_out)) @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out)) @@ -83,8 +79,6 @@ function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp @boundscheck composition.n_ion_species == size(f_out,5) || throw(BoundsError(f_out)) - # keep vpa vperp vz vr vzeta local so that - # vpa loop below can also be used for vz begin_r_z_vpa_region() if moments.evolve_density @@ -155,21 +149,44 @@ function ionization_collisions_1V!(f_out, f_neutral_out, fvec_in, vz, vpa, vperp @loop_r_z_vpa ir iz ivpa begin # apply ionization collisions to all ion species f_out[ivpa,1,iz,ir,is] += dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is] - # apply ionization collisions to all neutral species - f_neutral_out[ivpa,1,1,iz,ir,isn] -= dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is] end end end end -function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) - # This routine assumes a 3V model with: - @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) - @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) - @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) +function neutral_ionization_collisions_1V!(f_neutral_out, fvec_in, vz, vpa, vperp, z, r, + vz_spectral, moments, composition, collisions, dt) + # This routine assumes a 1D model with: + # nvz = nvpa and identical vz and vpa grids + # nvperp = nvr = nveta = 1 + # constant charge_exchange_frequency independent of species + @boundscheck vpa.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) + @boundscheck 1 == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) + @boundscheck 1 == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) + + if !moments.evolve_density + begin_r_z_vpa_region() + + @loop_s is begin + # ion ionisation rate = < f_n > n_e R_ion + # neutral "ionisation" (depopulation) rate = - f_n n_e R_ion + # no gyroaverage here as 1V code + #NB: used quasineutrality to replace electron density n_e with ion density + #NEEDS GENERALISATION TO n_ion_species > 1 (missing species charge: Sum_i Z_i n_i = n_e) + isn = is + @loop_r_z_vpa ir iz ivpa begin + # apply ionization collisions to all neutral species + f_neutral_out[ivpa,1,1,iz,ir,isn] -= dt*collisions.ionization*fvec_in.pdf_neutral[ivpa,1,1,iz,ir,isn]*fvec_in.density[iz,ir,is] + end + end + end +end + +function ion_ionization_collisions_3V!(f_out, f_neutral_gav_in, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + # This routine assumes a 3V model with: @boundscheck vpa.n == size(f_out,1) || throw(BoundsError(f_out)) @boundscheck vperp.n == size(f_out,2) || throw(BoundsError(f_out)) @boundscheck z.n == size(f_out,3) || throw(BoundsError(f_out)) @@ -185,21 +202,6 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_ begin_s_r_z_vperp_vpa_region() - # #if collisions.constant_ionization_rate - # # ## Oddly the test in test/harrisonthompson.jl matches the analitical - # # ## solution (which assumes width=0.0) better with width=0.5 than with, - # # ## e.g., width=0.15. Possibly narrower widths would require more vpa - # # ## resolution, which then causes crashes due to overshoots giving - # # ## negative f?? - # # #width = 0.5 - # # #@loop_s is begin - # # # #@loop_r_z_vperp_vpa ir iz ivperp ivpa begin - # # # # #f_out[ivpa,ivperp,iz,ir,is] += dt*collisions.ionization/width^3*exp(-((vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)/width^2)) - # # # #end - # # #end - # # #return nothing - # #end - # ion ionization rate = < f_n > n_e R_ion # neutral "ionization" (depopulation) rate = - f_n n_e R_ion #NB: used quasineutrality to replace electron density n_e with ion density @@ -213,6 +215,24 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_ end end end +end + +function neutral_ionization_collisions_3V!(f_neutral_out, fvec_in, composition, vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + # This routine assumes a 3V model with: + @boundscheck vz.n == size(f_neutral_out,1) || throw(BoundsError(f_neutral_out)) + @boundscheck vr.n == size(f_neutral_out,2) || throw(BoundsError(f_neutral_out)) + @boundscheck vzeta.n == size(f_neutral_out,3) || throw(BoundsError(f_neutral_out)) + @boundscheck z.n == size(f_neutral_out,4) || throw(BoundsError(f_neutral_out)) + @boundscheck r.n == size(f_neutral_out,5) || throw(BoundsError(f_neutral_out)) + @boundscheck composition.n_neutral_species == size(f_neutral_out,6) || throw(BoundsError(f_neutral_out)) + + ionization_frequency = collisions.ionization + + # ion ionization rate = < f_n > n_e R_ion + # neutral "ionization" (depopulation) rate = - f_n n_e R_ion + #NB: used quasineutrality to replace electron density n_e with ion density + #NEEDS GENERALISATION TO n_ion_species > 1 (missing species charge: Sum_i Z_i n_i = n_e) + # for ion species we need gyroaveraged neutral pdf, which is not stored in fvec (scratch[istage]) begin_sn_r_z_vzeta_vr_vz_region() @loop_sn isn begin for is ∈ 1:composition.n_ion_species @@ -222,7 +242,6 @@ function ionization_collisions_3V!(f_out, f_neutral_out, f_neutral_gav_in, fvec_ end end end - end end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 55aa4460b..d6f8c2229 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -36,8 +36,13 @@ using ..neutral_z_advection: update_speed_neutral_z!, neutral_advection_z! using ..neutral_vz_advection: update_speed_neutral_vz!, neutral_advection_vz! using ..vperp_advection: update_speed_vperp!, vperp_advection! using ..vpa_advection: update_speed_vpa!, vpa_advection!, implicit_vpa_advection! -using ..charge_exchange: charge_exchange_collisions_1V!, charge_exchange_collisions_3V! -using ..ionization: ionization_collisions_1V!, ionization_collisions_3V!, constant_ionization_source! +using ..charge_exchange: ion_charge_exchange_collisions_1V!, + neutral_charge_exchange_collisions_1V!, + ion_charge_exchange_collisions_3V!, + neutral_charge_exchange_collisions_3V! +using ..ionization: ion_ionization_collisions_1V!, neutral_ionization_collisions_1V!, + ion_ionization_collisions_3V!, neutral_ionization_collisions_3V!, + constant_ionization_source! using ..krook_collisions: krook_collisions! using ..external_sources using ..nonlinear_solvers @@ -706,10 +711,14 @@ function setup_advance_flags(moments, composition, t_params, collisions, advance_vperp_advection = false advance_z_advection = false advance_r_advection = false - advance_cx_1V = false - advance_cx = false - advance_ionization = false - advance_ionization_1V = false + advance_ion_cx_1V = false + advance_neutral_cx_1V = false + advance_ion_cx = false + advance_neutral_cx = false + advance_ion_ionization = false + advance_neutral_ionization = false + advance_ion_ionization_1V = false + advance_neutral_ionization_1V = false advance_ionization_source = false advance_krook_collisions_ii = false advance_external_source = false @@ -760,9 +769,11 @@ function setup_advance_flags(moments, composition, t_params, collisions, # account for charge exchange collisions if abs(collisions.charge_exchange) > 0.0 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 - advance_cx_1V = true + advance_ion_cx_1V = true + advance_neutral_cx_1V = true elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 - advance_cx = true + advance_ion_cx = true + advance_neutral_cx = true else error("If any perpendicular velocity has length>1 they all must. " * "If all perpendicular velocities have length=1, then vpa and " @@ -775,9 +786,11 @@ function setup_advance_flags(moments, composition, t_params, collisions, # account for ionization collisions if abs(collisions.ionization) > 0.0 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 - advance_ionization_1V = true + advance_ion_ionization_1V = true + advance_neutral_ionization_1V = true elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 - advance_ionization = true + advance_ion_ionization = true + advance_neutral_ionization = true else error("If any perpendicular velocity has length>1 they all must. " * "If all perpendicular velocities have length=1, then vpa and " @@ -849,9 +862,11 @@ function setup_advance_flags(moments, composition, t_params, collisions, return advance_info(advance_vpa_advection, advance_vperp_advection, advance_z_advection, advance_r_advection, advance_neutral_z_advection, advance_neutral_r_advection, - advance_neutral_vz_advection, advance_cx, advance_cx_1V, - advance_ionization, advance_ionization_1V, - advance_ionization_source, advance_krook_collisions_ii, + advance_neutral_vz_advection, advance_ion_cx, advance_neutral_cx, + advance_ion_cx_1V, advance_neutral_cx_1V, advance_ion_ionization, + advance_neutral_ionization, advance_ion_ionization_1V, + advance_neutral_ionization_1V, advance_ionization_source, + advance_krook_collisions_ii, explicit_weakform_fp_collisions, advance_external_source, advance_numerical_dissipation, advance_sources, advance_continuity, advance_force_balance, @@ -875,10 +890,14 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions advance_vperp_advection = false advance_z_advection = false advance_r_advection = false - advance_cx_1V = false - advance_cx = false - advance_ionization = false - advance_ionization_1V = false + advance_ion_cx_1V = false + advance_neutral_cx_1V = false + advance_ion_cx = false + advance_neutral_cx = false + advance_ion_ionization = false + advance_neutral_ionization = false + advance_ion_ionization_1V = false + advance_neutral_ionization_1V = false advance_ionization_source = false advance_krook_collisions_ii = false advance_external_source = false @@ -924,8 +943,10 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions return advance_info(advance_vpa_advection, advance_vperp_advection, advance_z_advection, advance_r_advection, advance_neutral_z_advection, advance_neutral_r_advection, - advance_neutral_vz_advection, advance_cx, advance_cx_1V, - advance_ionization, advance_ionization_1V, + advance_neutral_vz_advection, advance_ion_cx, advance_neutral_cx, + advance_ion_cx_1V, advance_neutral_cx_1V, advance_ion_ionization, + advance_neutral_ionization, advance_ion_ionization_1V, + advance_neutral_ionization_1V, advance_ionization_source, advance_krook_collisions_ii, explicit_weakform_fp_collisions, advance_external_source, advance_numerical_dissipation, @@ -1476,20 +1497,32 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para # account for charge exchange collisions between ions and neutrals if composition.n_neutral_species > 0 if collisions.charge_exchange > 0.0 - advance.cx_collisions = true + advance.ion_cx_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, + vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.ion_cx_collisions = false + advance.neutral_cx_collisions = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, nl_solver_params, advance, advance_implicit, istep) - advance.cx_collisions = false + advance.neutral_cx_collisions = false end if collisions.ionization > 0.0 - advance.ionization_collisions = true + advance.ion_ionization_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, + z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.ion_ionization_collisions = false + advance.neutral_ionization_collisions = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, nl_solver_params, advance, advance_implicit, istep) - advance.ionization_collisions = false + advance.neutral_ionization_collisions = false end end if collisions.krook_collision_frequency_prefactor > 0.0 @@ -1578,20 +1611,32 @@ function time_advance_split_operators!(pdf, scratch, scratch_implicit, t, t_para # account for charge exchange collisions between ions and neutrals if composition.n_neutral_species > 0 if collisions.ionization > 0.0 - advance.ionization = true + advance.neutral_ionization = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, composition, collisions, external_source_settings, num_diss_params, nl_solver_params, advance, advance_implicit, istep) - advance.ionization = false + advance.neutral_ionization = false + advance.ion_ionization = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, z, vpa, + z_spectral, vpa_spectral, moments, fields, z_advect, vpa_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.ion_ionization = false end if collisions.charge_exchange > 0.0 - advance.cx_collisions = true + advance.neutral_cx_collisions = true time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, composition, collisions, external_source_settings, num_diss_params, nl_solver_params, advance, advance_implicit, istep) - advance.cx_collisions = false + advance.neutral_cx_collisions = false + advance.ion_cx_collisions = true + time_advance_no_splitting!(pdf, scratch, scratch_implicit, t, t_params, vpa, z, + vpa_spectral, z_spectral, moments, fields, vpa_advect, z_advect, + composition, collisions, external_source_settings, num_diss_params, + nl_solver_params, advance, advance_implicit, istep) + advance.ion_cx_collisions = false end end # z_advection! advances the operator-split 1D advection equation in z @@ -2405,31 +2450,52 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, source_terms_manufactured!(fvec_out.pdf, fvec_out.pdf_neutral, vz, vr, vzeta, vpa, vperp, z, r, t, dt, composition, manufactured_source_list) end - if advance.cx_collisions || advance.ionization_collisions + if advance.ion_cx_collisions || advance.ion_ionization_collisions # gyroaverage neutral dfn and place it in the ion.buffer array for use in the collisions step vzvrvzeta_to_vpavperp!(pdf.ion.buffer, fvec_in.pdf_neutral, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, geometry, composition) + end + if advance.neutral_cx_collisions || advance.neutral_ionization_collisions # interpolate ion particle dfn and place it in the neutral.buffer array for use in the collisions step vpavperp_to_vzvrvzeta!(pdf.neutral.buffer, fvec_in.pdf, vz, vr, vzeta, vpa, vperp, z, r, geometry, composition) end # account for charge exchange collisions between ions and neutrals - if advance.cx_collisions_1V - charge_exchange_collisions_1V!(fvec_out.pdf, fvec_out.pdf_neutral, fvec_in, - moments, composition, vpa, vz, - collisions.charge_exchange, vpa_spectral, - vz_spectral, dt) - elseif advance.cx_collisions - charge_exchange_collisions_3V!(fvec_out.pdf, fvec_out.pdf_neutral, pdf.ion.buffer, pdf.neutral.buffer, fvec_in, composition, - vz, vr, vzeta, vpa, vperp, z, r, collisions.charge_exchange, dt) + if advance.ion_cx_collisions_1V + ion_charge_exchange_collisions_1V!(fvec_out.pdf, fvec_in, moments, composition, + vpa, vz, collisions.charge_exchange, + vpa_spectral, vz_spectral, dt) + elseif advance.ion_cx_collisions + ion_charge_exchange_collisions_3V!(fvec_out.pdf, pdf.ion.buffer, fvec_in, + composition, vz, vr, vzeta, vpa, vperp, z, r, + collisions.charge_exchange, dt) + end + if advance.neutral_cx_collisions_1V + neutral_charge_exchange_collisions_1V!(fvec_out.pdf_neutral, fvec_in, moments, + composition, vpa, vz, + collisions.charge_exchange, vpa_spectral, + vz_spectral, dt) + elseif advance.neutral_cx_collisions + neutral_charge_exchange_collisions_3V!(fvec_out.pdf_neutral, pdf.neutral.buffer, + fvec_in, composition, vz, vr, vzeta, vpa, + vperp, z, r, collisions.charge_exchange, + dt) end # account for ionization collisions between ions and neutrals - if advance.ionization_collisions_1V - ionization_collisions_1V!(fvec_out.pdf, fvec_out.pdf_neutral, fvec_in, vz, vpa, - vperp, z, r, vz_spectral, moments, composition, - collisions, dt) - elseif advance.ionization_collisions - ionization_collisions_3V!(fvec_out.pdf, fvec_out.pdf_neutral, pdf.ion.buffer, fvec_in, composition, - vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + if advance.ion_ionization_collisions_1V + ion_ionization_collisions_1V!(fvec_out.pdf, fvec_in, vz, vpa, vperp, z, r, + vz_spectral, moments, composition, collisions, dt) + elseif advance.ion_ionization_collisions + ion_ionization_collisions_3V!(fvec_out.pdf, pdf.ion.buffer, fvec_in, composition, + vz, vr, vzeta, vpa, vperp, z, r, collisions, dt) + end + if advance.neutral_ionization_collisions_1V + neutral_ionization_collisions_1V!(fvec_out.pdf_neutral, fvec_in, vz, vpa, vperp, + z, r, vz_spectral, moments, composition, + collisions, dt) + elseif advance.neutral_ionization_collisions + neutral_ionization_collisions_3V!(fvec_out.pdf_neutral, pdf.neutral.buffer, fvec_in, + composition, vz, vr, vzeta, vpa, vperp, z, r, + collisions, dt) end if advance.ionization_source constant_ionization_source!(fvec_out.pdf, fvec_in, vpa, vperp, z, r, moments, From 96d5cdc80db025c6db9ae596c0c09ede74e2f979 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 19 May 2024 15:02:35 +0100 Subject: [PATCH 49/75] Option for implicit advance of all terms in ion kinetic equation --- .../src/makie_post_processing.jl | 19 +- moment_kinetics/src/file_io.jl | 2 +- moment_kinetics/src/input_structs.jl | 1 + moment_kinetics/src/moment_kinetics_input.jl | 3 +- moment_kinetics/src/nonlinear_solvers.jl | 172 +++++++- moment_kinetics/src/time_advance.jl | 417 ++++++++++++++++-- 6 files changed, 553 insertions(+), 61 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index c4916e025..fe500dea5 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7306,8 +7306,11 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) else time = ri.time end - CFL_vars = ["minimum_CFL_ion_z"] - if !ri.t_input["implicit_vpa_advection"] + CFL_vars = String[] + if !ri.t_input["implicit_ion_advance"] + push!(CFL_vars, "minimum_CFL_ion_z") + end + if !(ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) push!(CFL_vars, "minimum_CFL_ion_vpa") end if ri.n_neutral_species > 0 @@ -7414,12 +7417,14 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) end end - # Ion z advection - counter += 1 - plot_1d(time, @view limit_caused_by_per_output[counter,:]; - label=prefix * "ion z advect", ax=ax, linestyle=:dot) + if !ri.t_input["implicit_ion_advance"] + # Ion z advection + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "ion z advect", ax=ax, linestyle=:dot) + end - if !ri.t_input["implicit_vpa_advection"] + if !(ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) # Ion vpa advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 200f55ce3..e63ae81cd 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -731,7 +731,7 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, parallel_io=parallel_io, description="Number of linear iterations for $term"), ) - for term ∈ keys(nl_solver_params) if term !== nothing) + for (term, params) ∈ pairs(nl_solver_params) if params !== nothing) return io_moments_info(fid, io_time, io_phi, io_Er, io_Ez, io_density, io_upar, io_ppar, io_pperp, io_qpar, io_vth, io_dSdt, io_chodura_lower, io_chodura_upper, diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 4c8b3d736..99a338156 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -74,6 +74,7 @@ struct time_info{Terrorsum <: Real, Trkimp, Timpzero} last_fail_proximity_factor::mk_float minimum_dt::mk_float maximum_dt::mk_float + implicit_ion_advance::Bool implicit_vpa_advection::Bool write_after_fixed_step_count::Bool error_sum_zero::Terrorsum diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index dc3c8e9c6..82b8b00fb 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -211,7 +211,8 @@ function mk_input(scan_input=Dict(); save_inputs_to_txt=false, ignore_MPI=true) last_fail_proximity_factor=1.05, minimum_dt=0.0, maximum_dt=Inf, - implicit_vpa_advection=true, + implicit_ion_advance=true, + implicit_vpa_advection=false, write_after_fixed_step_count=false, high_precision_error_sum=false, ) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index a704b638b..df4799632 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -82,19 +82,20 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol ) nl_solver_input = Dict_to_NamedTuple(nl_solver_section) - total_size_coords = prod(isa(c, coordinate) ? c.n : c for c ∈ values(coords)) + coord_sizes = Tuple(isa(c, coordinate) ? c.n : c for c ∈ coords) + total_size_coords = prod(coord_sizes) outer_coord_sizes = Tuple(isa(c, coordinate) ? c.n : c for c ∈ outer_coords) linear_restart = nl_solver_input.linear_restart if serial_solve H = allocate_float(linear_restart + 1, linear_restart) - V = allocate_float((isa(c, coordinate) ? c.n : c for c ∈ values(coords))..., linear_restart+1) + V = allocate_float(reverse(coord_sizes)..., linear_restart+1) H .= 0.0 V .= 0.0 else H = allocate_shared_float(linear_restart + 1, linear_restart) - V = allocate_shared_float((isa(c, coordinate) ? c.n : c for c ∈ values(coords))..., linear_restart+1) + V = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1) begin_serial_region() @serial_region begin @@ -327,6 +328,8 @@ function get_distributed_error_norm(coords, rtol, atol, x) this_norm = distributed_error_norm_z elseif dims == (:vpa,) this_norm = distributed_error_norm_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + this_norm = distributed_error_norm_s_r_z_vperp_vpa else error("dims=$dims is not supported yet. Need to write another " * "`distributed_error_norm_*()` function in nonlinear_solvers.jl") @@ -387,6 +390,50 @@ function distributed_error_norm_vpa(residual::AbstractArray{mk_float, 1}, coords return residual_norm end +function distributed_error_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}, + coords; rtol, atol, x) + n_ion_species = coords.s + r = coords.r + z = coords.z + vperp = coords.vperp + vpa = coords.vpa + + begin_s_r_z_vperp_vpa_region() + + local_norm = 0.0 + if r.irank < r.nrank - 1 + rend = r.n + else + rend = r.n + 1 + end + if z.irank < z.nrank - 1 + zend = z.n + else + zend = z.n + 1 + end + @loop_s_r_z is ir iz begin + if ir == rend || iz == zend + continue + end + @loop_vperp_vpa ivperp ivpa begin + local_norm += (residual[ivpa,ivperp,iz,ir,is] / (rtol * abs(x[ivpa,ivperp,iz,ir,is]) + atol))^2 + end + end + + _block_synchronize() + block_norm = MPI.Reduce(local_norm, +, comm_block[]) + + if block_rank[] == 0 + global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) + global_norm = sqrt(global_norm / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global)) + else + global_norm = nothing + end + global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + + return global_norm +end + """ get_distributed_linear_norm(coords) @@ -399,6 +446,8 @@ function get_distributed_linear_norm(coords) return distributed_linear_norm_z elseif dims == (:vpa,) return distributed_linear_norm_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + return distributed_linear_norm_s_r_z_vperp_vpa else error("dims=$dims is not supported yet. Need to write another " * "`distributed_linear_norm_*()` function in nonlinear_solvers.jl") @@ -445,6 +494,45 @@ function distributed_linear_norm_vpa(residual::AbstractArray{mk_float, 1}, coord return norm(residual) end +function distributed_linear_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}, coords) + r = coords.r + z = coords.z + + begin_s_r_z_vperp_vpa_region() + + local_norm = 0.0 + if r.irank < r.nrank - 1 + rend = r.n + else + rend = r.n + 1 + end + if z.irank < z.nrank - 1 + zend = z.n + else + zend = z.n + 1 + end + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + if ir == rend || iz == zend + continue + end + local_norm += residual[ivpa,ivperp,iz,ir,is]^2 + end + + _block_synchronize() + block_norm = MPI.Reduce(local_norm, +, comm_block[]) + + if block_rank[] == 0 + global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) + global_norm = sqrt(global_norm) + else + global_norm = nothing + end + global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + + return global_norm +end + """ get_distributed_dot(coords) @@ -457,13 +545,16 @@ function get_distributed_dot(coords) return distributed_dot_z elseif dims == (:vpa,) return distributed_dot_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + return distributed_dot_s_r_z_vperp_vpa else error("dims=$dims is not supported yet. Need to write another " * "`distributed_dot_*()` function in nonlinear_solvers.jl") end end -function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}) +function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}, + coords) begin_z_region() @@ -496,12 +587,51 @@ function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_fl return global_dot end -function distributed_dot_vpa(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}) +function distributed_dot_vpa(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}, + coords) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. return dot(x, y) end +function distributed_dot_s_r_z_vperp_vpa(x::AbstractArray{mk_float, 5}, + y::AbstractArray{mk_float, 5}, coords) + r = coords.r + z = coords.z + + begin_z_region() + + local_dot = 0.0 + if r.irank < r.nrank - 1 + rend = r.n + else + rend = r.n + 1 + end + if z.irank < z.nrank - 1 + zend = z.n + else + zend = z.n + 1 + end + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + if ir == rend || iz == zend + continue + end + local_dot += x[ivpa,ivperp,iz,ir,is] * y[ivpa,ivperp,iz,ir,is] + end + + _block_synchronize() + block_dot = MPI.Reduce(local_dot, +, comm_block[]) + + if block_rank[] == 0 + global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) + else + global_dot = nothing + end + + return global_dot +end + """ get_parallel_map(coords) @@ -514,6 +644,8 @@ function get_parallel_map(coords) return parallel_map_z elseif dims == (:vpa,) return parallel_map_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + return parallel_map_s_r_z_vperp_vpa else error("dims=$dims is not supported yet. Need to write another " * "`parallel_map_*()` function in nonlinear_solvers.jl") @@ -546,6 +678,18 @@ function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, return nothing end +function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, + args::AbstractArray{mk_float, 5}...) + + begin_s_r_z_vperp_vpa_region() + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func((x[ivpa,ivperp,iz,ir,is] for x ∈ args)...) + end + + return nothing +end + """ Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed at each step of the outer Newton iteration (in `newton_solve!()`). @@ -580,7 +724,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # Now we actually set 'w' as the first Krylov vector, and normalise it. parallel_map((residual0, v) -> -residual0 - v, w, residual0, v) beta = distributed_norm(w, coords) - parallel_map((w) -> w/beta, @view(V[:,1]), w) + parallel_map((w) -> w/beta, selectdim(V,ndims(V),1), w) # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is # so small that it is smaller than atol, in which case use atol instead. @@ -596,13 +740,13 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol #println("Linear ", counter) # Compute next Krylov vector - parallel_map((V) -> V, w, @view(V[:,i])) + parallel_map((V) -> V, w, selectdim(V,ndims(V),i)) approximate_Jacobian_vector_product!(w) # Gram-Schmidt orthogonalization for j ∈ 1:i - parallel_map((V) -> V, v, @view(V[:,j])) - w_dot_Vj = distributed_dot(w, v) + parallel_map((V) -> V, v, selectdim(V,ndims(V),j)) + w_dot_Vj = distributed_dot(w, v, coords) if serial_solve H[j,i] = w_dot_Vj else @@ -611,7 +755,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol H[j,i] = w_dot_Vj end end - parallel_map((w, V) -> w - H[j,i] * V, w, w, @view(V[:,j])) + parallel_map((w, V) -> w - H[j,i] * V, w, w, selectdim(V,ndims(V),j)) end norm_w = distributed_norm(w, coords) if serial_solve @@ -622,7 +766,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol H[i+1,i] = norm_w end end - parallel_map((w) -> w / H[i+1,i], @view(V[:,i+1]), w) + parallel_map((w) -> w / H[i+1,i], selectdim(V,ndims(V),i+1), w) function temporary_residual!(result, guess) #println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess)) @@ -671,7 +815,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y)) # slightly abusing splatting to get the sum into a lambda-function. parallel_map((delta_x, V...) -> delta_x + sum(this_y * this_V for (this_y, this_V) ∈ zip(y, V)), - delta_x, delta_x, (@view(V[:,i]) for i ∈ 1:length(y))...) + delta_x, delta_x, (selectdim(V,ndims(V),i) for i ∈ 1:length(y))...) right_preconditioner(delta_x) if residual < tol || restart_counter > max_restarts @@ -689,9 +833,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol parallel_map((residual0, v) -> -residual0 - v, v, residual0, v) beta = distributed_norm(v, coords) for i ∈ 2:length(y) - parallel_map(() -> 0.0, @view(V[:,i])) + parallel_map(() -> 0.0, selectdim(V,ndims(V),i)) end - parallel_map((v) -> v/beta, @view(V[:,1]), v) + parallel_map((v) -> v/beta, selectdim(V,ndims(V),1), v) end return counter diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index d6f8c2229..5924cde68 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -9,7 +9,7 @@ export setup_dummy_and_buffer_arrays using MPI using ..type_definitions: mk_float, mk_int -using ..array_allocation: allocate_float, allocate_shared_float, allocate_shared_bool +using ..array_allocation: allocate_float, allocate_shared_float, allocate_shared_int, allocate_shared_bool using ..communication using ..communication: _block_synchronize using ..debugging @@ -25,9 +25,11 @@ using ..velocity_moments: calculate_ion_moment_derivatives!, calculate_neutral_m using ..velocity_grid_transforms: vzvrvzeta_to_vpavperp!, vpavperp_to_vzvrvzeta! using ..boundary_conditions: enforce_boundary_conditions! using ..boundary_conditions: enforce_neutral_boundary_conditions! +using ..boundary_conditions: vpagrid_to_dzdt, enforce_v_boundary_condition_local! using ..input_structs using ..moment_constraints: hard_force_moment_constraints!, - hard_force_moment_constraints_neutral! + hard_force_moment_constraints_neutral!, + moment_constraints_on_residual! using ..advection: setup_advection using ..z_advection: update_speed_z!, z_advection! using ..r_advection: update_speed_r!, r_advection! @@ -136,6 +138,13 @@ struct scratch_dummy_arrays # needs to be shared memory buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} + # buffers to hold ion pdf for implicit solves + implicit_buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_3::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_4::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_5::MPISharedArray{mk_float,5} + implicit_buffer_vpavperpzrs_6::MPISharedArray{mk_float,5} buffer_vzvrvzetazsn_1::MPISharedArray{mk_float,5} buffer_vzvrvzetazsn_2::MPISharedArray{mk_float,5} @@ -160,6 +169,8 @@ struct scratch_dummy_arrays buffer_vpavperp_2::MPISharedArray{mk_float,2} buffer_vpavperp_3::MPISharedArray{mk_float,2} + int_buffer_rs_1::MPISharedArray{mk_int,2} + int_buffer_rs_2::MPISharedArray{mk_int,2} end struct advect_object_struct @@ -296,6 +307,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, if rk_coefs_implicit === nothing # Not an IMEX scheme, so cannot have any implicit terms + t_input["implicit_ion_advance"] = false t_input["implicit_vpa_advection"] = false end @@ -314,7 +326,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, t_input["step_update_prefactor"], t_input["max_increase_factor"], t_input["max_increase_factor_near_last_fail"], t_input["last_fail_proximity_factor"], t_input["minimum_dt"], - t_input["maximum_dt"], t_input["implicit_vpa_advection"], + t_input["maximum_dt"], t_input["implicit_ion_advance"], + t_input["implicit_vpa_advection"], t_input["write_after_fixed_step_count"], error_sum_zero, t_input["split_operators"], t_input["steady_state_residual"], t_input["converged_residual_value"], @@ -386,8 +399,10 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # ion pdf push!(t_params.limit_caused_by, 0) # RK accuracy - push!(t_params.limit_caused_by, 0) # z-advection CFL limit - if !t_params.implicit_vpa_advection + if !t_params.implicit_ion_advance + push!(t_params.limit_caused_by, 0) # z-advection CFL limit + end + if !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) push!(t_params.limit_caused_by, 0) # vpa-advection CFL limit end push!(t_params.failure_caused_by, 0) @@ -446,6 +461,20 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # Set up parameters for Jacobian-free Newton-Krylov solver used for implicit part of # timesteps. + if t_params.implicit_ion_advance + # Implicit solve for vpa_advection term should be done in serial, as it will be + # called within a parallelised s_r_z_vperp loop. + nl_solver_ion_advance_params = + setup_nonlinear_solve(input_dict, + (s=composition.n_ion_species, r=r, z=z, vperp=vperp, + vpa=vpa), + (); + default_rtol=t_params.rtol / 10.0, + default_atol=t_params.atol / 10.0, + preconditioner_type="lu") + else + nl_solver_ion_advance_params = nothing + end if t_params.implicit_vpa_advection # Implicit solve for vpa_advection term should be done in serial, as it will be # called within a parallelised s_r_z_vperp loop. @@ -458,7 +487,13 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop else nl_solver_vpa_advection_params = nothing end - nl_solver_params = (vpa_advection=nl_solver_vpa_advection_params,) + if nl_solver_ion_advance_params !== nothing && + nl_solver_vpa_advection_params !== nothing + error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same " + * "time") + end + nl_solver_params = (ion_advance=nl_solver_ion_advance_params, + vpa_advection=nl_solver_vpa_advection_params,) begin_serial_region() @@ -746,11 +781,11 @@ function setup_advance_flags(moments, composition, t_params, collisions, # default for non-split operators is to include both vpa and z advection together # If using an IMEX scheme and implicit vpa advection has been requested, then vpa # advection is not included in the explicit part of the timestep. - advance_vpa_advection = vpa.n > 1 && z.n > 1 && !t_params.implicit_vpa_advection - advance_vperp_advection = vperp.n > 1 && z.n > 1 - advance_z_advection = z.n > 1 - advance_r_advection = r.n > 1 - if collisions.fkpl.nuii > 0.0 && vperp.n > 1 + advance_vpa_advection = vpa.n > 1 && z.n > 1 && !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) + advance_vperp_advection = vperp.n > 1 && z.n > 1 && !t_params.implicit_ion_advance + advance_z_advection = z.n > 1 && !t_params.implicit_ion_advance + advance_r_advection = r.n > 1 && !t_params.implicit_ion_advance + if collisions.fkpl.nuii > 0.0 && vperp.n > 1 && !t_params.implicit_ion_advance explicit_weakform_fp_collisions = true else explicit_weakform_fp_collisions = false @@ -769,10 +804,10 @@ function setup_advance_flags(moments, composition, t_params, collisions, # account for charge exchange collisions if abs(collisions.charge_exchange) > 0.0 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 - advance_ion_cx_1V = true + advance_ion_cx_1V = !t_params.implicit_ion_advance advance_neutral_cx_1V = true elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 - advance_ion_cx = true + advance_ion_cx = !t_params.implicit_ion_advance advance_neutral_cx = true else error("If any perpendicular velocity has length>1 they all must. " @@ -786,10 +821,10 @@ function setup_advance_flags(moments, composition, t_params, collisions, # account for ionization collisions if abs(collisions.ionization) > 0.0 if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 - advance_ion_ionization_1V = true + advance_ion_ionization_1V = !t_params.implicit_ion_advance advance_neutral_ionization_1V = true elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 - advance_ion_ionization = true + advance_ion_ionization = !t_params.implicit_ion_advance advance_neutral_ionization = true else error("If any perpendicular velocity has length>1 they all must. " @@ -801,20 +836,20 @@ function setup_advance_flags(moments, composition, t_params, collisions, end end # exception for the case where ions are evolved alone but sourced by ionization - if collisions.ionization > 0.0 && collisions.constant_ionization_rate + if collisions.ionization > 0.0 && collisions.constant_ionization_rate && !t_params.implicit_ion_advance advance_ionization_source = true end if collisions.krook.nuii0 > 0.0 - advance_krook_collisions_ii = true + advance_krook_collisions_ii = !t_params.implicit_ion_advance end - advance_external_source = external_source_settings.ion.active + advance_external_source = external_source_settings.ion.active && !t_params.implicit_ion_advance advance_neutral_external_source = external_source_settings.neutral.active - advance_numerical_dissipation = !t_params.implicit_vpa_advection + advance_numerical_dissipation = !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) # if evolving the density, must advance the continuity equation, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation if moments.evolve_density - advance_sources = true + advance_sources = !t_params.implicit_ion_advance advance_continuity = true if composition.n_neutral_species > 0 advance_neutral_sources = true @@ -825,7 +860,7 @@ function setup_advance_flags(moments, composition, t_params, collisions, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation if moments.evolve_upar - advance_sources = true + advance_sources = !t_params.implicit_ion_advance advance_force_balance = true if composition.n_neutral_species > 0 advance_neutral_sources = true @@ -836,7 +871,7 @@ function setup_advance_flags(moments, composition, t_params, collisions, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation if moments.evolve_ppar - advance_sources = true + advance_sources = !t_params.implicit_ion_advance advance_energy = true if composition.n_neutral_species > 0 advance_neutral_sources = true @@ -853,8 +888,8 @@ function setup_advance_flags(moments, composition, t_params, collisions, # flag to determine if a d^2/dvpa^2 operator is present # When using implicit_vpa_advection, the vpa diffusion is included in the implicit # step - vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) - vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) + vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) + vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0) end @@ -922,7 +957,44 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions if t_params.split_operators error("Implicit timesteps do not support `t_params.split_operators=true`") end - if t_params.implicit_vpa_advection + if t_params.implicit_ion_advance + advance_vpa_advection = vpa.n > 1 && z.n > 1 + advance_vperp_advection = vperp.n > 1 && z.n > 1 + advance_z_advection = z.n > 1 + advance_r_advection = r.n > 1 + if abs(collisions.charge_exchange) > 0.0 + if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 + advance_ion_cx_1V = true + elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 + advance_ion_cx = true + else + error("If any perpendicular velocity has length>1 they all must. " + * "If all perpendicular velocities have length=1, then vpa and " + * "vz should be the same.\n" + * "vperp.n=$(vperp.n), vr.n=$(vr.n), vzeta.n=$(vzeta.n), " + * "vpa.n=$(vpa.n), vz.n=$(vz.n)") + end + end + if abs(collisions.ionization) > 0.0 + if vz.n == vpa.n && vperp.n == 1 && vr.n == 1 && vzeta.n == 1 + advance_ion_ionization_1V = true + elseif vperp.n > 1 && vr.n > 1 && vzeta.n > 1 + advance_ion_ionization = true + else + error("If any perpendicular velocity has length>1 they all must. " + * "If all perpendicular velocities have length=1, then vpa and " + * "vz should be the same.\n" + * "vperp.n=$(vperp.n), vr.n=$(vr.n), vzeta.n=$(vzeta.n), " + * "vpa.n=$(vpa.n), vz.n=$(vz.n)") + end + end + advance_ionization_source = collisions.ionization > 0.0 && collisions.constant_ionization_rate + advance_krook_collisions_ii = collisions.krook.nuii0 > 0.0 + advance_external_source = external_source_settings.ion.active + advance_numerical_dissipation = true + advance_sources = moments.evolve_density || moments.evolve_upar || moments.evolve_ppar + explicit_weakform_fp_collisions = collisions.fkpl.nuii > 0.0 && vperp.n > 1 + elseif t_params.implicit_vpa_advection advance_vpa_advection = true advance_numerical_dissipation = true end @@ -935,8 +1007,8 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions # flag to determine if a d^2/dvpa^2 operator is present # When using implicit_vpa_advection, the vpa diffusion is included in the implicit # step - vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) - vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || explicit_weakform_fp_collisions) + vpa_diffusion = ((num_diss_params.ion.vpa_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) + vperp_diffusion = ((num_diss_params.ion.vperp_dissipation_coefficient > 0.0) || (collisions.fkpl.nuii > 0.0 && vperp.n > 1)) vz_diffusion = (num_diss_params.neutral.vz_dissipation_coefficient > 0.0) manufactured_solns_test = manufactured_solns_input.use_for_advance @@ -1018,6 +1090,13 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies buffer_vpavperpzrs_1 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) buffer_vpavperpzrs_2 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + + implicit_buffer_vpavperpzrs_1 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_2 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_3 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_4 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_5 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) + implicit_buffer_vpavperpzrs_6 = allocate_shared_float(nvpa,nvperp,nz,nr,nspecies_ion) buffer_vzvrvzetazsn_1 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral) buffer_vzvrvzetazsn_2 = allocate_shared_float(nvz,nvr,nvzeta,nz,nspecies_neutral) @@ -1040,6 +1119,9 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies buffer_vpavperp_2 = allocate_shared_float(nvpa,nvperp) buffer_vpavperp_3 = allocate_shared_float(nvpa,nvperp) + int_buffer_rs_1 = allocate_shared_int(nr,nspecies_ion) + int_buffer_rs_2 = allocate_shared_int(nr,nspecies_ion) + return scratch_dummy_arrays(dummy_s,dummy_sr,dummy_vpavperp,dummy_zrs,dummy_zrsn, buffer_z_1,buffer_z_2,buffer_z_3,buffer_z_4, buffer_r_1,buffer_r_2,buffer_r_3,buffer_r_4, @@ -1051,10 +1133,12 @@ function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies buffer_vpavperpzs_1,buffer_vpavperpzs_2,buffer_vpavperpzs_3,buffer_vpavperpzs_4,buffer_vpavperpzs_5,buffer_vpavperpzs_6, buffer_vpavperprs_1,buffer_vpavperprs_2,buffer_vpavperprs_3,buffer_vpavperprs_4,buffer_vpavperprs_5,buffer_vpavperprs_6, buffer_vpavperpzrs_1,buffer_vpavperpzrs_2, + implicit_buffer_vpavperpzrs_1,implicit_buffer_vpavperpzrs_2,implicit_buffer_vpavperpzrs_3,implicit_buffer_vpavperpzrs_4,implicit_buffer_vpavperpzrs_5,implicit_buffer_vpavperpzrs_6, buffer_vzvrvzetazsn_1,buffer_vzvrvzetazsn_2,buffer_vzvrvzetazsn_3,buffer_vzvrvzetazsn_4,buffer_vzvrvzetazsn_5,buffer_vzvrvzetazsn_6, buffer_vzvrvzetarsn_1,buffer_vzvrvzetarsn_2,buffer_vzvrvzetarsn_3,buffer_vzvrvzetarsn_4,buffer_vzvrvzetarsn_5,buffer_vzvrvzetarsn_6, buffer_vzvrvzetazrsn_1, buffer_vzvrvzetazrsn_2, - buffer_vpavperp_1,buffer_vpavperp_2,buffer_vpavperp_3) + buffer_vpavperp_1,buffer_vpavperp_2,buffer_vpavperp_3, + int_buffer_rs_1,int_buffer_rs_2) end @@ -1882,18 +1966,20 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen # reduction over the shared-memory block, so all processes must calculate the same # species at the same time. begin_r_vperp_vpa_region(; no_synchronize=true) - ion_z_CFL = Inf - @loop_s is begin - update_speed_z!(z_advect[is], moments.ion.upar, moments.ion.vth, evolve_upar, - evolve_ppar, fields, vpa, vperp, z, r, t, geometry, is) - this_minimum = get_minimum_CFL_z(z_advect[is].speed, z) - @serial_region begin - ion_z_CFL = min(ion_z_CFL, this_minimum) + if !t_params.implicit_ion_advance + ion_z_CFL = Inf + @loop_s is begin + update_speed_z!(z_advect[is], moments.ion.upar, moments.ion.vth, evolve_upar, + evolve_ppar, fields, vpa, vperp, z, r, t, geometry, is) + this_minimum = get_minimum_CFL_z(z_advect[is].speed, z) + @serial_region begin + ion_z_CFL = min(ion_z_CFL, this_minimum) + end end + push!(CFL_limits, t_params.CFL_prefactor * ion_z_CFL) end - push!(CFL_limits, t_params.CFL_prefactor * ion_z_CFL) - if !t_params.implicit_vpa_advection + if !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) # ion vpa-advection begin_r_z_vperp_region() ion_vpa_CFL = Inf @@ -2617,7 +2703,19 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect - if advance.vpa_advection + if nl_solver_params.ion_advance !== nothing + success = implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, + gyrophase, z, r, t, dt, spectral_objects, + composition, collisions, geometry, scratch_dummy, + manufactured_source_list, + external_source_settings, num_diss_params, + nl_solver_params.ion_advance, advance, fp_arrays, + istage) + if !success + return success + end + elseif advance.vpa_advection success = implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, vpa_advect, vpa, vperp, z, r, dt, t, vpa_spectral, composition, collisions, @@ -2632,6 +2730,249 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects return true end +""" + implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, + vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt, + spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, + nl_solver_params, advance, fp_arrays, istage) + +Do a backward-Euler timestep for all terms in the ion kinetic equation. +""" +function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, + vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt, + spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, + nl_solver_params, advance, fp_arrays, istage) + + vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral + vpa_advect, vperp_advect, r_advect, z_advect = advect_objects.vpa_advect, advect_objects.vperp_advect, advect_objects.r_advect, advect_objects.z_advect + + # Make a copy of fvec_in.pdf so we can apply boundary conditions at the 'new' + # timestep, as these are the boundary conditions we need to apply the residual. + f_old = scratch_dummy.implicit_buffer_vpavperpzrs_1 + begin_s_r_z_vperp_vpa_region() + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + f_old[ivpa,ivperp,iz,ir,is] = fvec_in.pdf[ivpa,ivperp,iz,ir,is] + end + + coords = (s=composition.n_ion_species, r=r, z=z, vperp=vperp, vpa=vpa) + icut_lower_z = scratch_dummy.int_buffer_rs_1 + icut_upper_z = scratch_dummy.int_buffer_rs_2 + zero = 1.0e-14 + + begin_s_r_z_region() + @loop_s_r_z is ir iz begin + @views hard_force_moment_constraints!(f_old[:,:,iz,ir,is], moments, vpa) + end + + begin_s_r_region() + @loop_s_r is ir begin + if z.irank == 0 + iz = 1 + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_lower_z[ir,is] = vpa.n + for ivpa ∈ vpa.n:-1:1 + # for left boundary in zed (z = -Lz/2), want + # f(z=-Lz/2, v_parallel > 0) = 0 + if vpa.scratch[ivpa] ≤ zero + icut_lower_z[ir,is] = ivpa + 1 + break + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], + fvec_in.upar[iz,ir,is], + moments.evolve_ppar, + moments.evolve_upar) + icut_upper_z[ir,is] = 0 + for ivpa ∈ 1:vpa.n + # for right boundary in zed (z = Lz/2), want + # f(z=Lz/2, v_parallel < 0) = 0 + if vpa.scratch[ivpa] ≥ -zero + icut_upper_z[ir,is] = ivpa - 1 + break + end + end + end + end + + if vpa.n > 1 + # calculate the vpa advection speed, to ensure it is correct when used to apply the + # boundary condition + update_speed_vpa!(vpa_advect, fields, fvec_in, moments, vpa, vperp, z, r, composition, + collisions, external_source_settings.ion, t, geometry) + end + if z.n > 1 + @loop_s is begin + # get the updated speed along the z direction using the current f + @views update_speed_z!(z_advect[is], fvec_in.upar[:,:,is], + moments.ion.vth[:,:,is], moments.evolve_upar, + moments.evolve_ppar, fields, vpa, vperp, z, r, t, + geometry, is) + end + end + if r.n > 1 + @loop_s is begin + # get the updated speed along the r direction using the current f + @views update_speed_r!(r_advect[is], fvec_in.upar[:,:,is], + moments.ion.vth[:,:,is], fields, moments.evolve_upar, + moments.evolve_ppar, vpa, vperp, z, r, geometry, is) + end + end + if vperp.n > 1 + # calculate the vpa advection speed, to ensure it is correct when used to apply the + # boundary condition + begin_s_r_z_vpa_region() + @loop_s is begin + # get the updated speed along the r direction using the current f + @views update_speed_vperp!(vperp_advect[is], vpa, vperp, z, r, z_advect[is], + r_advect[is], geometry) + end + end + + function apply_bc!(x) + if vpa.n > 1 + begin_s_r_z_vperp_region() + @loop_s_r_z_vperp is ir iz ivperp begin + @views enforce_v_boundary_condition_local!(x[:,ivperp,iz,ir,is], vpa.bc, + vpa_advect[is].speed[:,ivperp,iz,ir], + advance.vpa_diffusion, vpa, + vpa_spectral) + end + end + if vperp.n > 1 + begin_s_r_z_vpa_region() + enforce_vperp_boundary_condition!(x, vperp.bc, vperp, vperp_spectral, + vperp_adv, vperp_diffusion) + end + + if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) + # Wall boundary conditions. Note that as density, upar, ppar do not + # change in this implicit step, f_new, f_old, and residual should all + # be zero at exactly the same set of grid points, so it is reasonable + # to zero-out `residual` to impose the boundary condition. We impose + # this after subtracting f_old in case rounding errors, etc. mean that + # at some point f_old had a different boundary condition cut-off + # index. + begin_s_r_vperp_region() + if z.irank == 0 + iz = 1 + @loop_s_r_vperp is ir ivperp begin + x[icut_lower_z[ir,is]:end,ivperp,iz,ir,is] .= 0.0 + end + end + if z.irank == z.nrank - 1 + iz = z.n + @loop_s_r_vperp is ir ivperp begin + x[1:icut_upper_z[ir,is],ivperp,iz,ir,is] .= 0.0 + end + end + end + + return nothing + end + + # Use a forward-Euler step as the initial guess for fvec_out.pdf + euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_objects, vz, vr, + vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, + composition, collisions, geometry, scratch_dummy, + manufactured_source_list, external_source_settings, + num_diss_params, advance, fp_arrays, istage) + + # Apply the 'new' boundary conditions to f_old, so it has the same boundary conditions + # as we will apply to the residual, so that f_new obeys the 'new' boundary conditions. + apply_bc!(f_old) + # Also apply the bc to the forward-Euler updated values which are the initial state + # for 'f_new'. + apply_bc!(fvec_out.pdf) + hard_force_moment_constraints!(fvec_out.pdf, moments, vpa) + + # Define a function whose input is `f_new`, so that when it's output + # `residual` is zero, f_new is the result of a backward-Euler timestep: + # (f_new - f_old) / dt = RHS(f_new) + # ⇒ f_new - f_old - dt*RHS(f_new) = 0 + function residual_func!(residual, f_new) + begin_s_r_z_vperp_vpa_region() + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + residual[ivpa,ivperp,iz,ir,is] = f_old[ivpa,ivperp,iz,ir,is] + end + + # scratch_pdf struct containing the array passed as f_new + new_scratch = scratch_pdf(f_new, fvec_out.density, fvec_out.upar, fvec_out.ppar, + fvec_out.pperp, fvec_out.temp_z_s, fvec_out.pdf_neutral, + fvec_out.density_neutral, fvec_out.uz_neutral, + fvec_out.pz_neutral) + # scratch_pdf struct containing the array passed as residual + residual_scratch = scratch_pdf(residual, fvec_out.density, fvec_out.upar, + fvec_out.ppar, fvec_out.pperp, fvec_out.temp_z_s, + fvec_out.pdf_neutral, fvec_out.density_neutral, + fvec_out.uz_neutral, fvec_out.pz_neutral) + + euler_time_advance!(residual_scratch, new_scratch, pdf, fields, moments, + advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, + r, t, dt, spectral_objects, composition, collisions, geometry, + scratch_dummy, manufactured_source_list, + external_source_settings, num_diss_params, advance, fp_arrays, + istage) + + # Make sure updated f will not contain negative values + #@. residual = max(residual, minval) + + # Now + # residual = f_old + dt*RHS(f_new) + # so update to desired residual + begin_s_r_z_vperp_vpa_region() + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + residual[ivpa,ivperp,iz,ir,is] = f_new[ivpa,ivperp,iz,ir,is] - residual[ivpa,ivperp,iz,ir,is] + end + + apply_bc!(residual) + + begin_s_r_z_region() + @loop_s_r_z is ir iz begin + @views moment_constraints_on_residual!(residual[:,:,iz,ir,is], + f_new[:,:,iz,ir,is], moments, vpa) + end + + return nothing + end + + # No preconditioning for now + left_preconditioner = identity + right_preconditioner = identity + + # Buffers + # Note vpa,scratch is used by advance_f!, so we cannot use it here. + residual = scratch_dummy.implicit_buffer_vpavperpzrs_2 + delta_x = scratch_dummy.implicit_buffer_vpavperpzrs_3 + rhs_delta = scratch_dummy.implicit_buffer_vpavperpzrs_4 + v = scratch_dummy.implicit_buffer_vpavperpzrs_5 + w = scratch_dummy.implicit_buffer_vpavperpzrs_6 + + # Using the forward-Euler step seems (in at least one case) to slightly + # increase the number of iterations, so skip this. + ## Use forward-Euler step for initial guess + #residual_func!(residual, this_f_out) + #this_f_out .+= residual + + success = newton_solve!(fvec_out.pdf, residual_func!, residual, delta_x, + rhs_delta, v, w, nl_solver_params, coords=coords, + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner) + + nl_solver_params.stage_counter[] += 1 + + return success +end + """ update the vector containing the pdf and any evolved moments of the pdf for use in the Runge-Kutta time advance From 0d0ac3a6668a6addf1c5c5ecd61f010d5644aecf Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 20 May 2024 15:51:12 +0100 Subject: [PATCH 50/75] Consistently treat rtol/atol in distributed_norm() and distributed_dot() Both distributed_norm() and distributed_dot() need to use rtol and atol. Newton iteration and GMRES iteration now use the same norm. To use a 'Euclidean norm' (the standard thing for GMRES), pass rtol=0, atol=1. --- moment_kinetics/src/nonlinear_solvers.jl | 200 +++++++---------------- moment_kinetics/src/time_advance.jl | 9 +- 2 files changed, 61 insertions(+), 148 deletions(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index df4799632..22d9ba964 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -190,13 +190,12 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, rtol = nl_solver_params.rtol atol = nl_solver_params.atol - distributed_error_norm = get_distributed_error_norm(coords, rtol, atol, x) - distributed_linear_norm = get_distributed_linear_norm(coords) - distributed_dot = get_distributed_dot(coords) + distributed_norm = get_distributed_norm(coords, rtol, atol, x) + distributed_dot = get_distributed_dot(coords, rtol, atol, x) parallel_map = get_parallel_map(coords) residual_func!(residual, x) - residual_norm = distributed_error_norm(residual, coords) + residual_norm = distributed_norm(residual) counter = 0 linear_counter = 0 @@ -230,7 +229,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, H=nl_solver_params.H, V=nl_solver_params.V, rhs_delta=rhs_delta, initial_guess=nl_solver_params.linear_initial_guess, - distributed_norm=distributed_linear_norm, + distributed_norm=distributed_norm, distributed_dot=distributed_dot, parallel_map=parallel_map, serial_solve=nl_solver_params.serial_solve) @@ -250,7 +249,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # For the Newton iteration, we want the norm divided by the (sqrt of the) number # of grid points, so we can use a tolerance that is independent of the size of the # grid. This is unlike the norms needed in `linear_solve!()`. - residual_norm = distributed_error_norm(residual, coords) + residual_norm = distributed_norm(residual) if isnan(residual_norm) error("NaN in Newton iteration at iteration $counter") end @@ -261,7 +260,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, while s > 1.0e-5 parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) residual_func!(residual, x) - residual_norm = distributed_error_norm(residual, coords) + residual_norm = distributed_norm(residual) if residual_norm ≤ previous_residual_norm break end @@ -274,13 +273,13 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, s = -1.0e-5 parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) residual_func!(residual, x) - residual_norm = distributed_error_norm(residual, coords) + residual_norm = distributed_norm(residual) if residual_norm > previous_residual_norm # That didn't work either, so just take the full step and hope for # convergence later parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) residual_func!(residual, x) - residual_norm = distributed_error_norm(residual, coords) + residual_norm = distributed_norm(residual) end end end @@ -317,32 +316,31 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, end """ - get_distributed_error_norm(coords) + get_distributed_norm(coords, rtol, atol, x) -Get a 'distributed_error_norm' function that acts on arrays with dimensions given by the +Get a 'distributed_norm' function that acts on arrays with dimensions given by the entries in `coords`. """ -function get_distributed_error_norm(coords, rtol, atol, x) +function get_distributed_norm(coords, rtol, atol, x) dims = keys(coords) if dims == (:z,) - this_norm = distributed_error_norm_z + this_norm = distributed_norm_z elseif dims == (:vpa,) - this_norm = distributed_error_norm_vpa + this_norm = distributed_norm_vpa elseif dims == (:s, :r, :z, :vperp, :vpa) - this_norm = distributed_error_norm_s_r_z_vperp_vpa + this_norm = distributed_norm_s_r_z_vperp_vpa else error("dims=$dims is not supported yet. Need to write another " - * "`distributed_error_norm_*()` function in nonlinear_solvers.jl") + * "`distributed_norm_*()` function in nonlinear_solvers.jl") end - wrapped_norm = (args...; kwargs...) -> this_norm(args...; rtol=rtol, atol=atol, x, - kwargs...) + wrapped_norm = (args...; kwargs...) -> this_norm(args...; rtol=rtol, atol=atol, x=x, + coords=coords, kwargs...) return wrapped_norm end -function distributed_error_norm_z(residual::AbstractArray{mk_float, 1}, coords; rtol, - atol, x) +function distributed_norm_z(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) z = coords.z begin_z_region() @@ -376,8 +374,7 @@ function distributed_error_norm_z(residual::AbstractArray{mk_float, 1}, coords; return global_norm end -function distributed_error_norm_vpa(residual::AbstractArray{mk_float, 1}, coords; rtol, - atol, x) +function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. residual_norm = 0.0 @@ -390,8 +387,8 @@ function distributed_error_norm_vpa(residual::AbstractArray{mk_float, 1}, coords return residual_norm end -function distributed_error_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}, - coords; rtol, atol, x) +function distributed_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}; + coords, rtol, atol, x) n_ion_species = coords.s r = coords.r z = coords.z @@ -435,127 +432,34 @@ function distributed_error_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float end """ - get_distributed_linear_norm(coords) + get_distributed_dot(coords, rtol, atol, x) -Get a 'distributed_linear_norm' function that acts on arrays with dimensions given by the -entries in `coords`. +Get a 'distributed_dot' function that acts on arrays with dimensions given by the entries +in `coords`. """ -function get_distributed_linear_norm(coords) +function get_distributed_dot(coords, rtol, atol, x) dims = keys(coords) if dims == (:z,) - return distributed_linear_norm_z + this_dot = distributed_dot_z elseif dims == (:vpa,) - return distributed_linear_norm_vpa + this_dot = distributed_dot_vpa elseif dims == (:s, :r, :z, :vperp, :vpa) - return distributed_linear_norm_s_r_z_vperp_vpa + this_dot = distributed_dot_s_r_z_vperp_vpa else error("dims=$dims is not supported yet. Need to write another " - * "`distributed_linear_norm_*()` function in nonlinear_solvers.jl") + * "`distributed_dot_*()` function in nonlinear_solvers.jl") end -end - -function distributed_linear_norm_z(residual::AbstractArray{mk_float, 1}, coords) - z = coords.z - begin_z_region() + wrapped_dot = (args...; kwargs...) -> this_dot(args...; rtol=rtol, atol=atol, x=x, + coords=coords, kwargs...) - local_norm = 0.0 - if z.irank < z.nrank - 1 - zend = z.n - @loop_z iz begin - if iz == zend - continue - end - local_norm += residual[iz]^2 - end - else - @loop_z iz begin - local_norm += residual[iz]^2 - end - end - - _block_synchronize() - block_norm = MPI.Reduce(local_norm, +, comm_block[]) - - if block_rank[] == 0 - global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) - global_norm = sqrt(global_norm) - else - global_norm = nothing - end - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) - - return global_norm end -function distributed_linear_norm_vpa(residual::AbstractArray{mk_float, 1}, coords) - # No parallelism needed when the implicit solve is over vpa - assume that this will be - # called inside a parallelised s_r_z_vperp loop. - return norm(residual) -end +function distributed_dot_z(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; + coords, atol, rtol, x) -function distributed_linear_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}, coords) - r = coords.r z = coords.z - begin_s_r_z_vperp_vpa_region() - - local_norm = 0.0 - if r.irank < r.nrank - 1 - rend = r.n - else - rend = r.n + 1 - end - if z.irank < z.nrank - 1 - zend = z.n - else - zend = z.n + 1 - end - - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - if ir == rend || iz == zend - continue - end - local_norm += residual[ivpa,ivperp,iz,ir,is]^2 - end - - _block_synchronize() - block_norm = MPI.Reduce(local_norm, +, comm_block[]) - - if block_rank[] == 0 - global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) - global_norm = sqrt(global_norm) - else - global_norm = nothing - end - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) - - return global_norm -end - -""" - get_distributed_dot(coords) - -Get a 'distributed_dot' function that acts on arrays with dimensions given by the entries -in `coords`. -""" -function get_distributed_dot(coords) - dims = keys(coords) - if dims == (:z,) - return distributed_dot_z - elseif dims == (:vpa,) - return distributed_dot_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - return distributed_dot_s_r_z_vperp_vpa - else - error("dims=$dims is not supported yet. Need to write another " - * "`distributed_dot_*()` function in nonlinear_solvers.jl") - end -end - -function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}, - coords) - begin_z_region() z = coords.z @@ -567,11 +471,11 @@ function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_fl if iz == zend continue end - local_dot += x[iz] * y[iz] + local_dot += v[iz] * w[iz] / (rtol * abs(x[iz]) + atol)^2 end else @loop_z iz begin - local_dot += x[iz] * y[iz] + local_dot += v[iz] * w[iz] / (rtol * abs(x[iz]) + atol)^2 end end @@ -580,6 +484,7 @@ function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_fl if block_rank[] == 0 global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) + global_dot = global_dot / z.n_global else global_dot = nothing end @@ -587,17 +492,26 @@ function distributed_dot_z(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_fl return global_dot end -function distributed_dot_vpa(x::AbstractArray{mk_float, 1}, y::AbstractArray{mk_float, 1}, - coords) +function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; + coords, atol, rtol, x) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. - return dot(x, y) + local_dot = 0.0 + for i ∈ eachindex(v,w) + local_dot += v[i] * w[i] / (rtol * abs(x[i]) + atol)^2 + end + local_dot = local_dot / length(v) + return local_dot end -function distributed_dot_s_r_z_vperp_vpa(x::AbstractArray{mk_float, 5}, - y::AbstractArray{mk_float, 5}, coords) +function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5}, + w::AbstractArray{mk_float, 5}; + coords, atol, rtol, x) + n_ion_species = coords.s r = coords.r z = coords.z + vperp = coords.vperp + vpa = coords.vpa begin_z_region() @@ -617,7 +531,7 @@ function distributed_dot_s_r_z_vperp_vpa(x::AbstractArray{mk_float, 5}, if ir == rend || iz == zend continue end - local_dot += x[ivpa,ivperp,iz,ir,is] * y[ivpa,ivperp,iz,ir,is] + local_dot += v[ivpa,ivperp,iz,ir,is] * w[ivpa,ivperp,iz,ir,is] / (rtol * abs(x[ivpa,ivperp,iz,ir,is]) + atol)^2 end _block_synchronize() @@ -625,6 +539,7 @@ function distributed_dot_s_r_z_vperp_vpa(x::AbstractArray{mk_float, 5}, if block_rank[] == 0 global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) + global_dot = global_dot / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global) else global_dot = nothing end @@ -701,7 +616,8 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # Solve (approximately?): # J δx = residual0 - epsilon = 1.0e-8 + tol = max(rtol, atol) + epsilon = 1.0e-8 / tol inv_epsilon = 1.0 / epsilon function approximate_Jacobian_vector_product!(v) @@ -723,7 +639,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol approximate_Jacobian_vector_product!(v) # Now we actually set 'w' as the first Krylov vector, and normalise it. parallel_map((residual0, v) -> -residual0 - v, w, residual0, v) - beta = distributed_norm(w, coords) + beta = distributed_norm(w) parallel_map((w) -> w/beta, selectdim(V,ndims(V),1), w) # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is @@ -746,7 +662,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # Gram-Schmidt orthogonalization for j ∈ 1:i parallel_map((V) -> V, v, selectdim(V,ndims(V),j)) - w_dot_Vj = distributed_dot(w, v, coords) + w_dot_Vj = distributed_dot(w, v) if serial_solve H[j,i] = w_dot_Vj else @@ -757,7 +673,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol end parallel_map((w, V) -> w - H[j,i] * V, w, w, selectdim(V,ndims(V),j)) end - norm_w = distributed_norm(w, coords) + norm_w = distributed_norm(w) if serial_solve H[i+1,i] = norm_w else @@ -831,7 +747,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # Note residual0 has already had the left_preconditioner!() applied to it. parallel_map((residual0, v) -> -residual0 - v, v, residual0, v) - beta = distributed_norm(v, coords) + beta = distributed_norm(v) for i ∈ 2:length(y) parallel_map(() -> 0.0, selectdim(V,ndims(V),i)) end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 5924cde68..560193a10 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -2763,6 +2763,9 @@ function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_o icut_upper_z = scratch_dummy.int_buffer_rs_2 zero = 1.0e-14 + rtol = nl_solver_params.rtol + atol = nl_solver_params.atol + begin_s_r_z_region() @loop_s_r_z is ir iz begin @views hard_force_moment_constraints!(f_old[:,:,iz,ir,is], moments, vpa) @@ -2936,12 +2939,6 @@ function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_o apply_bc!(residual) - begin_s_r_z_region() - @loop_s_r_z is ir iz begin - @views moment_constraints_on_residual!(residual[:,:,iz,ir,is], - f_new[:,:,iz,ir,is], moments, vpa) - end - return nothing end From e3afc227671dbbeec571bc7927075547852d93ec Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 20 May 2024 17:15:44 +0100 Subject: [PATCH 51/75] Optimise parallel_map() computations in nonlinear_solvers The compiler is not able to fully resolve the previous complicated generator expressions into efficient code, so write simpler versions for fixed numbers of arguments. Also add specialised function for the calculation of delta_x from V.y. --- moment_kinetics/src/nonlinear_solvers.jl | 145 ++++++++++++++++++++--- 1 file changed, 129 insertions(+), 16 deletions(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 22d9ba964..40b27974b 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -193,6 +193,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, distributed_norm = get_distributed_norm(coords, rtol, atol, x) distributed_dot = get_distributed_dot(coords, rtol, atol, x) parallel_map = get_parallel_map(coords) + parallel_delta_x_calc = get_parallel_delta_x_calc(coords) residual_func!(residual, x) residual_norm = distributed_norm(residual) @@ -232,6 +233,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, distributed_norm=distributed_norm, distributed_dot=distributed_dot, parallel_map=parallel_map, + parallel_delta_x_calc=parallel_delta_x_calc, serial_solve=nl_solver_params.serial_solve) linear_counter += linear_its @@ -567,39 +569,151 @@ function get_parallel_map(coords) end end -function parallel_map_z(func, result::AbstractArray{mk_float, 1}, - args::AbstractArray{mk_float, 1}...) +# Separate versions for different numbers of arguments as generator expressions result in +# slow code + +function parallel_map_z(func, result::AbstractArray{mk_float, 1}) begin_z_region() @loop_z iz begin - result[iz] = func((x[iz] for x ∈ args)...) + result[iz] = func() end return nothing end +function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1) + + begin_z_region() + + @loop_z iz begin + result[iz] = func(x1[iz]) + end + + return nothing +end +function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1, x2) + + begin_z_region() -function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, - args::AbstractArray{mk_float, 1}...) + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz]) + end + + return nothing +end + +function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. - if length(args) == 0 - for i ∈ eachindex(result) - result[i] = func() - end + for i ∈ eachindex(result) + result[i] = func() + end + return nothing +end +function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + for i ∈ eachindex(result) + result[i] = func(x1[i]) + end + return nothing +end +function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1, x2) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i]) + end + return nothing +end + +function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}) + + begin_s_r_z_vperp_vpa_region() + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func() + end + + return nothing +end +function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1) + + begin_s_r_z_vperp_vpa_region() + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is]) + end + + return nothing +end +function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1, x2) + + begin_s_r_z_vperp_vpa_region() + + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is]) + end + + return nothing +end + +""" + get_parallel_delta_x_calc(coords) + +Get a parallelised function that calculates the update `delta_x` from the `V` matrix and +the minimum residual coefficients `y`. +""" +function get_parallel_delta_x_calc(coords) + dims = keys(coords) + if dims == (:z,) + return parallel_delta_x_calc_z + elseif dims == (:vpa,) + return parallel_delta_x_calc_vpa + elseif dims == (:s, :r, :z, :vperp, :vpa) + return parallel_delta_x_calc_s_r_z_vperp_vpa else - map!(func, result, args...) + error("dims=$dims is not supported yet. Need to write another " + * "`parallel_delta_x_calc_*()` function in nonlinear_solvers.jl") + end +end + +function parallel_delta_x_calc_z(delta_x::AbstractArray{mk_float, 1}, V, y) + + begin_z_region() + + ny = length(y) + @loop_z iz begin + for iy ∈ 1:ny + delta_x[iz] += y[iy] * V[iz,iy] + end + end + + return nothing +end + +function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + ny = length(y) + for ivpa ∈ eachindex(delta_x) + for iy ∈ 1:ny + delta_x[ivpa] += y[iy] * V[ivpa,iy] + end end return nothing end -function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, - args::AbstractArray{mk_float, 5}...) +function parallel_delta_x_calc_s_r_z_vperp_vpa(delta_x::AbstractArray{mk_float, 5}, V, y) begin_s_r_z_vperp_vpa_region() + ny = length(y) @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - result[ivpa,ivperp,iz,ir,is] = func((x[ivpa,ivperp,iz,ir,is] for x ∈ args)...) + for iy ∈ 1:ny + delta_x[ivpa,ivperp,iz,ir,is] += y[iy] * V[ivpa,ivperp,iz,ir,is,iy] + end end return nothing @@ -612,7 +726,7 @@ at each step of the outer Newton iteration (in `newton_solve!()`). function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol, restart, max_restarts, left_preconditioner, right_preconditioner, H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot, - parallel_map, serial_solve) + parallel_map, parallel_delta_x_calc, serial_solve) # Solve (approximately?): # J δx = residual0 @@ -730,8 +844,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # The following is the `parallel_map()` version of # delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y)) # slightly abusing splatting to get the sum into a lambda-function. - parallel_map((delta_x, V...) -> delta_x + sum(this_y * this_V for (this_y, this_V) ∈ zip(y, V)), - delta_x, delta_x, (selectdim(V,ndims(V),i) for i ∈ 1:length(y))...) + parallel_delta_x_calc(delta_x, V, y) right_preconditioner(delta_x) if residual < tol || restart_counter > max_restarts From b02e70d712263f6e7ee427fe07cd1bf6b32a8510 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 21 May 2024 09:48:25 +0100 Subject: [PATCH 52/75] Variants of hard_force_moment_constraints!() that loop over spatial grid Added for convenience. --- moment_kinetics/src/moment_constraints.jl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index ae7c8b156..f8c0a2274 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -7,6 +7,7 @@ module moment_constraints using ..communication: _block_synchronize using ..looping +using ..type_definitions: mk_float using ..velocity_moments: integrate_over_vspace, update_qpar! export hard_force_moment_constraints!, hard_force_moment_constraints_neutral! @@ -85,6 +86,16 @@ function hard_force_moment_constraints!(f, moments, vpa) return A, B, C end +function hard_force_moment_constraints!(f::AbstractArray{mk_float,5}, moments, vpa) + A = moments.ion.constraints_A_coefficient + B = moments.ion.constraints_B_coefficient + C = moments.ion.constraints_C_coefficient + begin_s_r_z_region() + @loop_s_r_z is ir iz begin + A[iz,ir,is], B[iz,ir,is], C[iz,ir,is] = + hard_force_moment_constraints!(@view(f[:,:,iz,ir,is]), moments, vpa) + end +end """ hard_force_moment_constraints_neutral!(f, moments, vz) @@ -139,6 +150,16 @@ function hard_force_moment_constraints_neutral!(f, moments, vz) return A, B, C end +function hard_force_moment_constraints_neutral!(f::AbstractArray{mk_float,6}, moments, vz) + A = moments.neutral.constraints_A_coefficient + B = moments.neutral.constraints_B_coefficient + C = moments.neutral.constraints_C_coefficient + begin_sn_r_z_region() + @loop_sn_r_z isn ir iz begin + A[iz,ir,isn], B[iz,ir,isn], C[iz,ir,isn] = + hard_force_moment_constraints_neutral!(@view(f[:,:,:,iz,ir,is]), moments, vz) + end +end """ moment_constraints_on_residual!(residual, f, moments, vpa) From 3365c3d2e37ee37f2021ca8aded07cf72ba1745e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 21 May 2024 11:22:39 +0100 Subject: [PATCH 53/75] Separate 'advance flags' for ion and neutral numerical dissipation Allows operator splitting into implicit and explicit parts. --- moment_kinetics/src/input_structs.jl | 3 +- moment_kinetics/src/time_advance.jl | 44 ++++++++++++++++------------ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 99a338156..f81d28a30 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -107,7 +107,8 @@ mutable struct advance_info krook_collisions_ii::Bool explicit_weakform_fp_collisions::Bool external_source::Bool - numerical_dissipation::Bool + ion_numerical_dissipation::Bool + neutral_numerical_dissipation::Bool source_terms::Bool continuity::Bool force_balance::Bool diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 560193a10..684d0cc4c 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -757,7 +757,8 @@ function setup_advance_flags(moments, composition, t_params, collisions, advance_ionization_source = false advance_krook_collisions_ii = false advance_external_source = false - advance_numerical_dissipation = false + advance_ion_numerical_dissipation = false + advance_neutral_numerical_dissipation = false advance_sources = false advance_continuity = false advance_force_balance = false @@ -844,7 +845,8 @@ function setup_advance_flags(moments, composition, t_params, collisions, end advance_external_source = external_source_settings.ion.active && !t_params.implicit_ion_advance advance_neutral_external_source = external_source_settings.neutral.active - advance_numerical_dissipation = !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) + advance_ion_numerical_dissipation = !(t_params.implicit_ion_advance || t_params.implicit_vpa_advection) + advance_neutral_numerical_dissipation = true # if evolving the density, must advance the continuity equation, # in addition to including sources arising from the use of a modified distribution # function in the kinetic equation @@ -903,12 +905,13 @@ function setup_advance_flags(moments, composition, t_params, collisions, advance_neutral_ionization_1V, advance_ionization_source, advance_krook_collisions_ii, explicit_weakform_fp_collisions, - advance_external_source, advance_numerical_dissipation, - advance_sources, advance_continuity, advance_force_balance, - advance_energy, advance_neutral_external_source, - advance_neutral_sources, advance_neutral_continuity, - advance_neutral_force_balance, advance_neutral_energy, - manufactured_solns_test, r_diffusion, vpa_diffusion, vperp_diffusion, vz_diffusion) + advance_external_source, advance_ion_numerical_dissipation, + advance_neutral_numerical_dissipation, advance_sources, + advance_continuity, advance_force_balance, advance_energy, + advance_neutral_external_source, advance_neutral_sources, + advance_neutral_continuity, advance_neutral_force_balance, + advance_neutral_energy, manufactured_solns_test, r_diffusion, + vpa_diffusion, vperp_diffusion, vz_diffusion) end """ @@ -936,7 +939,8 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions advance_ionization_source = false advance_krook_collisions_ii = false advance_external_source = false - advance_numerical_dissipation = false + advance_ion_numerical_dissipation = false + advance_neutral_numerical_dissipation = false advance_sources = false advance_continuity = false advance_force_balance = false @@ -991,12 +995,12 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions advance_ionization_source = collisions.ionization > 0.0 && collisions.constant_ionization_rate advance_krook_collisions_ii = collisions.krook.nuii0 > 0.0 advance_external_source = external_source_settings.ion.active - advance_numerical_dissipation = true + advance_ion_numerical_dissipation = true advance_sources = moments.evolve_density || moments.evolve_upar || moments.evolve_ppar explicit_weakform_fp_collisions = collisions.fkpl.nuii > 0.0 && vperp.n > 1 elseif t_params.implicit_vpa_advection advance_vpa_advection = true - advance_numerical_dissipation = true + advance_ion_numerical_dissipation = true end # *_diffusion flags are set regardless of whether diffusion is included in explicit or # implicit part of timestep, because they are used for boundary conditions, not to @@ -1021,13 +1025,13 @@ function setup_implicit_advance_flags(moments, composition, t_params, collisions advance_neutral_ionization_1V, advance_ionization_source, advance_krook_collisions_ii, explicit_weakform_fp_collisions, - advance_external_source, advance_numerical_dissipation, - advance_sources, advance_continuity, advance_force_balance, - advance_energy, advance_neutral_external_source, - advance_neutral_sources, advance_neutral_continuity, - advance_neutral_force_balance, advance_neutral_energy, - manufactured_solns_test, r_diffusion, vpa_diffusion, - vperp_diffusion, vz_diffusion) + advance_external_source, advance_ion_numerical_dissipation, + advance_neutral_numerical_dissipation, advance_sources, + advance_continuity, advance_force_balance, advance_energy, + advance_neutral_external_source, advance_neutral_sources, + advance_neutral_continuity, advance_neutral_force_balance, + advance_neutral_energy, manufactured_solns_test, r_diffusion, + vpa_diffusion, vperp_diffusion, vz_diffusion) end function setup_dummy_and_buffer_arrays(nr,nz,nvpa,nvperp,nvz,nvr,nvzeta,nspecies_ion,nspecies_neutral) @@ -2604,7 +2608,7 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, end # add numerical dissipation - if advance.numerical_dissipation + if advance.ion_numerical_dissipation vpa_dissipation!(fvec_out.pdf, fvec_in.pdf, vpa, vpa_spectral, dt, num_diss_params.ion.vpa_dissipation_coefficient) vperp_dissipation!(fvec_out.pdf, fvec_in.pdf, vperp, vperp_spectral, dt, @@ -2613,6 +2617,8 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, num_diss_params.ion.z_dissipation_coefficient, scratch_dummy) r_dissipation!(fvec_out.pdf, fvec_in.pdf, r, r_spectral, dt, num_diss_params.ion.r_dissipation_coefficient, scratch_dummy) + end + if advance.neutral_numerical_dissipation vz_dissipation_neutral!(fvec_out.pdf_neutral, fvec_in.pdf_neutral, vz, vz_spectral, dt, num_diss_params.neutral.vz_dissipation_coefficient) z_dissipation_neutral!(fvec_out.pdf_neutral, fvec_in.pdf_neutral, z, z_spectral, From ee3eec9dce9e7fc67fc3063e1b871deeab8b30dd Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 21 May 2024 14:54:02 +0100 Subject: [PATCH 54/75] Fudge factor to avoid unlikely but endless timestep-failure loop --- moment_kinetics/src/runge_kutta.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 087df7dff..917f4f3d2 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -1059,7 +1059,9 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er # error norm here t_params.failure_caused_by[end] += 1 end - elseif (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt + elseif (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt * (1.0 + 1.0e-13) + # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when + # t+dt=next_output_time. # Use current_dt instead of t_params.dt[] here because we are about to write to # the shared-memory variable t_params.dt[] below, and we do not want to add an # extra _block_synchronize() call after reading it here. From bbfaace95d4db058bd64e739d7b4337bf09bd75d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 14:29:36 +0100 Subject: [PATCH 55/75] Prevent timestep increase when number of nonlinear iterations was large If the number of nonlinear iterations was large on the previous step (greater than half of the maximum allowed value for some call to `newton_solve!()` during the step) then it is likely that the nonlinear solve will fail on the next step if the timestep is allowed to increase, so prevent any timestep increase in this case (the timestep is allowed to decrease, if the other conditions caused that). --- .../src/makie_post_processing.jl | 5 ++++ moment_kinetics/src/nonlinear_solvers.jl | 29 ++++++++++++++++--- moment_kinetics/src/runge_kutta.jl | 26 ++++++++++++----- moment_kinetics/src/time_advance.jl | 25 ++++++++++++---- 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index fe500dea5..546a6a8db 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7370,6 +7370,11 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "max timestep", ax=ax) + # High nonlinear iterations count + counter += 1 + plot_1d(time, @view limit_caused_by_per_output[counter,:]; + label=prefix * "high nl iterations", ax=ax) + # Accuracy limit counters counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 40b27974b..33f4f8581 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -25,7 +25,7 @@ Useful references: """ module nonlinear_solvers -export setup_nonlinear_solve, newton_solve! +export setup_nonlinear_solve, reset_nonlinear_per_stage_counters, newton_solve! using ..array_allocation: allocate_float, allocate_shared_float using ..communication @@ -42,6 +42,7 @@ using SparseArrays struct nl_solver_info{TH,TV,Tlig,Tprecon} rtol::mk_float atol::mk_float + nonlinear_max_iterations::mk_int linear_rtol::mk_float linear_atol::mk_float linear_restart::mk_int @@ -54,6 +55,7 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} linear_iterations::Ref{mk_int} stage_counter::Ref{mk_int} serial_solve::Bool + max_nonlinear_iterations_this_step::Ref{mk_int} preconditioner_update_interval::mk_int preconditioners::Tprecon end @@ -74,6 +76,7 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol input_dict, "nonlinear_solver"; rtol=default_rtol, atol=default_atol, + nonlinear_max_iterations=20, linear_rtol=1.0e-3, linear_atol=1.0e-15, linear_restart=10, @@ -116,11 +119,27 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol linear_initial_guess = zeros(linear_restart) return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol, + nl_solver_input.nonlinear_max_iterations, nl_solver_input.linear_rtol, nl_solver_input.linear_atol, linear_restart, nl_solver_input.linear_max_restarts, H, V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), - serial_solve, nl_solver_input.preconditioner_update_interval, - preconditioners) + serial_solve, Ref(0), + nl_solver_input.preconditioner_update_interval, preconditioners) +end + +""" + reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) + +Reset the counters that hold per-step totals or maximums in `nl_solver_params`. +""" +function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) + if nl_solver_params === nothing + return nothing + end + + nl_solver_params.max_nonlinear_iterations_this_step[] = 0 + + return nothing end """ @@ -295,7 +314,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, close_linear_counter = linear_counter end - if counter > 100000 + if counter > nl_solver_params.nonlinear_max_iterations println("maximum iteration limit reached") success = false break @@ -304,6 +323,8 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params.n_solves[] += 1 nl_solver_params.nonlinear_iterations[] += counter nl_solver_params.linear_iterations[] += linear_counter + nl_solver_params.max_nonlinear_iterations_this_step[] = + max(counter, nl_solver_params.max_nonlinear_iterations_this_step[]) # println("Newton iterations: ", counter) # println("Final residual: ", residual_norm) # println("Total linear iterations: ", linear_counter) diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 917f4f3d2..ec53bf40e 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -956,13 +956,14 @@ end """ adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, - total_points, current_dt, error_norm_method) + total_points, current_dt, error_norm_method, + success, nl_max_its_fraction) Use the calculated `CFL_limits` and `error_norms` to update the timestep in `t_params`. """ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms, total_points, current_dt, error_norm_method, - success) + success, nl_max_its_fraction) # Get global minimum of CFL limits CFL_limit = nothing this_limit_caused_by = nothing @@ -972,9 +973,9 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er CFL_limit_caused_by = argmin(CFL_limits) CFL_limit = CFL_limits[CFL_limit_caused_by] # Reserve first four entries of t_params.limit_caused_by for max_increase_factor, - # max_increase_factor_near_fail, minimum_dt and maximum_dt limits, then the next - # `n_variables` for RK accuracy limits. - this_limit_caused_by = CFL_limit_caused_by + 4 + t_params.n_variables + # max_increase_factor_near_fail, minimum_dt, maximum_dt limits and + # high_nl_iterations, then the next `n_variables` for RK accuracy limits. + this_limit_caused_by = CFL_limit_caused_by + 5 + t_params.n_variables end if error_norm_method == "Linf" @@ -1131,8 +1132,8 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er else # Reserve first four entries of t_params.limit_caused_by for # max_increase_factor, max_increase_factor_near_fail, minimum_dt and - # maximum_dt limits. - this_limit_caused_by = 4 + max_error_variable_index + # maximum_dt limits, high_nl_iterations. + this_limit_caused_by = 5 + max_error_variable_index end # Limit so timestep cannot increase by a large factor, which might lead to @@ -1174,6 +1175,17 @@ function adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, er this_limit_caused_by = 4 end + if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0 + # The last step took many nonlinear iterations, so do not allow the + # timestep to increase. + # If t_params.previous_dt[]==0.0, then the previous step failed so + # timestep will not be increasing, so do not need this check. + if t_params.dt[] > t_params.previous_dt[] + t_params.dt[] = t_params.previous_dt[] + this_limit_caused_by = 5 + end + end + t_params.limit_caused_by[this_limit_caused_by] += 1 if (t_params.step_counter[] % 1000 == 0) && global_rank[] == 0 diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 684d0cc4c..982cf2000 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -394,8 +394,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop # timestepping. # # Entries for limit by max_increase_factor, max_increase_factor_near_last_fail, - # minimum_dt and maximum_dt. - push!(t_params.limit_caused_by, 0, 0, 0, 0) + # minimum_dt, maximum_dt and high_nl_iterations. + push!(t_params.limit_caused_by, 0, 0, 0, 0, 0) # ion pdf push!(t_params.limit_caused_by, 0) # RK accuracy @@ -1924,7 +1924,8 @@ end fields, composition, collisions, geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success) + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, + success, nl_max_its_fraction) Check the error estimate for the embedded RK method and adjust the timestep if appropriate. @@ -1934,7 +1935,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, advance, scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, - success) + success, nl_max_its_fraction) #error_norm_method = "Linf" error_norm_method = "L2" @@ -2141,7 +2142,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen adaptive_timestep_update_t_params!(t_params, scratch, t, CFL_limits, error_norms, total_points, current_dt, error_norm_method, - success) + success, nl_max_its_fraction) if t_params.previous_dt[] == 0.0 # Re-update remaining velocity moments that are calculable from the evolved @@ -2374,15 +2375,27 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa end if t_params.adaptive + nl_max_its_fraction = 0.0 + for p ∈ nl_solver_params + if p !== nothing + nl_max_its_fraction = + max(p.max_nonlinear_iterations_this_step[] / p.nonlinear_max_iterations, + nl_max_its_fraction) + end + end adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, fields, composition, collisions, geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success) + scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success, + nl_max_its_fraction) elseif !success error("Implicit part of timestep failed") end + reset_nonlinear_per_stage_counters(nl_solver_params.ion_advance) + reset_nonlinear_per_stage_counters(nl_solver_params.vpa_advection) + istage = n_rk_stages+1 # update the pdf.norm and moments arrays as needed From 68768202856441d10cd7f3471a7a6494e05a3df5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 16:52:59 +0100 Subject: [PATCH 56/75] Allow using Chebyshev discretization in vpa advection preconditioner ... as long as there is no vpa diffusion active. --- moment_kinetics/src/vpa_advection.jl | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index df2b778ca..81b0b54ff 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -11,6 +11,7 @@ using ..communication using ..looping using ..moment_constraints: hard_force_moment_constraints!, moment_constraints_on_residual! +using ..moment_kinetics_structs: weak_discretization_info using ..nonlinear_solvers: newton_solve! using ..array_allocation: allocate_float @@ -163,10 +164,16 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v for i ∈ 1:vpa.n advection_matrix[i,i] += 1.0 end - # This allocates a new matrix - to avoid this would need to pre-allocate a - # suitable buffer somewhere and use `mul!()`. - advection_matrix = vpa_spectral.mass_matrix * advection_matrix - @. advection_matrix -= dt * vpa_dissipation_coefficient * vpa_spectral.K_matrix + + if isa(vpa_spectral, weak_discretization_info) + # This allocates a new matrix - to avoid this would need to pre-allocate a + # suitable buffer somewhere and use `mul!()`. + advection_matrix = vpa_spectral.mass_matrix * advection_matrix + @. advection_matrix -= dt * vpa_dissipation_coefficient * vpa_spectral.K_matrix + elseif vpa_dissipation_coefficient > 0.0 + error("Non-weak-form schemes cannot precondition diffusion") + end + # hacky (?) Dirichlet boundary conditions this_f_out[1] = 0.0 this_f_out[end] = 0.0 @@ -202,8 +209,10 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v end function preconditioner(x) - # Multiply by mass matrix, storing result in vpa.scratch - mul!(vpa.scratch, vpa_spectral.mass_matrix, x) + if isa(vpa_spectral, weak_discretization_info) + # Multiply by mass matrix, storing result in vpa.scratch + mul!(vpa.scratch, vpa_spectral.mass_matrix, x) + end # Handle boundary conditions enforce_v_boundary_condition_local!(vpa.scratch, vpa_bc, speed, vpa_diffusion, From ec934dd6670e92868af52bcb5746cec21606c20a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 20:19:05 +0100 Subject: [PATCH 57/75] Check for duplicated entries in `advance` and `advance_implicit` --- moment_kinetics/src/time_advance.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 982cf2000..720dc0080 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -458,6 +458,16 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop external_source_settings, num_diss_params, manufactured_solns_input, r, z, vperp, vpa, vzeta, vr, vz) + # Check that no flags that shouldn't be are set in both advance and advance_implicit + for field ∈ fieldnames(advance_info) + if field ∈ (:r_diffusion, :vpa_diffusion, :vperp_diffusion, :vz_diffusion) + # These are meant to be set in both structs + continue + end + if getfield(advance, field) && getfield(advance_implicit, field) + error("$field is set to `true` in both `advance` and `advance_implicit`") + end + end # Set up parameters for Jacobian-free Newton-Krylov solver used for implicit part of # timesteps. From d49a11f1989b971c1fb971585d2b43dd372db34d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 22 May 2024 20:27:13 +0100 Subject: [PATCH 58/75] Update derived moments and their derivatives in implicit solves for pdf --- moment_kinetics/src/time_advance.jl | 114 +++++------------------- moment_kinetics/src/velocity_moments.jl | 84 +++++++++++++++++ moment_kinetics/src/vpa_advection.jl | 27 ++++-- 3 files changed, 125 insertions(+), 100 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 720dc0080..0ada40e49 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -16,7 +16,7 @@ using ..debugging using ..file_io: write_data_to_ascii, write_all_moments_data_to_binary, write_all_dfns_data_to_binary, debug_dump using ..looping using ..moment_kinetics_structs: scratch_pdf -using ..velocity_moments: update_moments!, update_moments_neutral!, reset_moments_status! +using ..velocity_moments: update_moments!, update_moments_neutral!, reset_moments_status!, update_derived_moments!, update_derived_moments_neutral! using ..velocity_moments: update_density!, update_upar!, update_ppar!, update_pperp!, update_qpar!, update_vth! using ..velocity_moments: update_neutral_density!, update_neutral_qz! using ..velocity_moments: update_neutral_uzeta!, update_neutral_uz!, update_neutral_ur! @@ -2168,89 +2168,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen return nothing end -""" -update velocity moments that are calculable from the evolved ion pdf -""" -function update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, - r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) - - if composition.gyrokinetic_ions - ff = scratch_dummy.buffer_vpavperpzrs_1 - # fill buffer with ring-averaged F (gyroaverage at fixed position) - gyroaverage_pdf!(ff,new_scratch.pdf,gyroavs,vpa,vperp,z,r,composition) - else - ff = new_scratch.pdf - end - - if !moments.evolve_density - update_density!(new_scratch.density, moments.ion.dens_updated, - ff, vpa, vperp, z, r, composition) - end - if !moments.evolve_upar - update_upar!(new_scratch.upar, moments.ion.upar_updated, new_scratch.density, - new_scratch.ppar, ff, vpa, vperp, z, r, composition, - moments.evolve_density, moments.evolve_ppar) - end - if !moments.evolve_ppar - # update_ppar! calculates (p_parallel/m_s N_e c_s^2) + (n_s/N_e)*(upar_s/c_s)^2 = (1/√π)∫d(vpa/c_s) (vpa/c_s)^2 * (√π f_s c_s / N_e) - update_ppar!(new_scratch.ppar, moments.ion.ppar_updated, new_scratch.density, - new_scratch.upar, ff, vpa, vperp, z, r, composition, - moments.evolve_density, moments.evolve_upar) - end - update_pperp!(new_scratch.pperp, ff, vpa, vperp, z, r, composition) - - # if diagnostic time step/RK stage - # update the diagnostic chodura condition - if diagnostic_moments - update_chodura!(moments,ff,vpa,vperp,z,r,r_spectral,composition,geometry,scratch_dummy,z_advect) - end - # update the thermal speed - begin_s_r_z_region() - try #below block causes DomainError if ppar < 0 or density, so exit cleanly if possible - update_vth!(moments.ion.vth, new_scratch.ppar, new_scratch.pperp, new_scratch.density, vperp, z, r, composition) - catch e - if global_size[] > 1 - println("ERROR: error calculating vth in time_advance.jl") - println(e) - display(stacktrace(catch_backtrace())) - flush(stdout) - flush(stderr) - MPI.Abort(comm_world, 1) - end - rethrow(e) - end - # update the parallel heat flux - update_qpar!(moments.ion.qpar, moments.ion.qpar_updated, new_scratch.density, - new_scratch.upar, moments.ion.vth, ff, vpa, vperp, z, r, - composition, moments.evolve_density, moments.evolve_upar, - moments.evolve_ppar) - # add further moments to be computed here - -end - -""" -update velocity moments that are calculable from the evolved neutral pdf -""" -function update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r, - composition) - if !moments.evolve_density - update_neutral_density!(new_scratch.density_neutral, moments.neutral.dens_updated, - new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition) - end - if !moments.evolve_upar - update_neutral_uz!(new_scratch.uz_neutral, moments.neutral.uz_updated, - new_scratch.density_neutral, new_scratch.pz_neutral, - new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, - moments.evolve_density, moments.evolve_ppar) - end - if !moments.evolve_ppar - update_neutral_pz!(new_scratch.pz_neutral, moments.neutral.pz_updated, - new_scratch.density_neutral, new_scratch.uz_neutral, - new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, - moments.evolve_density, moments.evolve_upar) - end -end - """ """ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa, vperp, @@ -2330,8 +2247,8 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa geometry, scratch_dummy, manufactured_source_list, external_source_settings, num_diss_params, - nl_solver_params, advance_implicit, fp_arrays, - istage) + gyroavs, nl_solver_params, advance_implicit, + fp_arrays, istage) success = MPI.Allreduce(success, &, comm_world) if !success # Break out of the istage loop, as passing `success = false` to the @@ -2725,7 +2642,8 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, composition, collisions, geometry, scratch_dummy, manufactured_source_list, external_source_settings, - num_diss_params, nl_solver_params, advance, fp_arrays, istage) + num_diss_params, gyroavs, nl_solver_params, advance, fp_arrays, + istage) vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral vz_spectral, vr_spectral, vzeta_spectral = spectral_objects.vz_spectral, spectral_objects.vr_spectral, spectral_objects.vzeta_spectral @@ -2739,18 +2657,20 @@ function backward_euler!(fvec_out, fvec_in, pdf, fields, moments, advect_objects composition, collisions, geometry, scratch_dummy, manufactured_source_list, external_source_settings, num_diss_params, - nl_solver_params.ion_advance, advance, fp_arrays, - istage) + gyroavs, nl_solver_params.ion_advance, advance, + fp_arrays, istage) if !success return success end elseif advance.vpa_advection success = implicit_vpa_advection!(fvec_out.pdf, fvec_in, fields, moments, - vpa_advect, vpa, vperp, z, r, dt, t, - vpa_spectral, composition, collisions, + z_advect, vpa_advect, vpa, vperp, z, r, dt, t, + r_spectral, z_spectral, vpa_spectral, + composition, collisions, external_source_settings.ion, geometry, nl_solver_params.vpa_advection, - advance.vpa_diffusion, num_diss_params) + advance.vpa_diffusion, num_diss_params, gyroavs, + scratch_dummy) if !success return success end @@ -2773,7 +2693,7 @@ function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_o vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, composition, collisions, geometry, scratch_dummy, manufactured_source_list, - external_source_settings, num_diss_params, + external_source_settings, num_diss_params, gyroavs, nl_solver_params, advance, fp_arrays, istage) vpa_spectral, vperp_spectral, r_spectral, z_spectral = spectral_objects.vpa_spectral, spectral_objects.vperp_spectral, spectral_objects.r_spectral, spectral_objects.z_spectral @@ -2948,6 +2868,14 @@ function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_o fvec_out.pdf_neutral, fvec_out.density_neutral, fvec_out.uz_neutral, fvec_out.pz_neutral) + # Ensure moments are consistent with f_new + update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, + false) + calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z, + z_spectral, + num_diss_params.ion.moment_dissipation_coefficient) + euler_time_advance!(residual_scratch, new_scratch, pdf, fields, moments, advect_objects, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, t, dt, spectral_objects, composition, collisions, geometry, diff --git a/moment_kinetics/src/velocity_moments.jl b/moment_kinetics/src/velocity_moments.jl index 1316080aa..5b211e437 100644 --- a/moment_kinetics/src/velocity_moments.jl +++ b/moment_kinetics/src/velocity_moments.jl @@ -1440,6 +1440,90 @@ function calculate_neutral_moment_derivatives!(moments, scratch, scratch_dummy, end end +""" +update velocity moments that are calculable from the evolved ion pdf +""" +function update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, diagnostic_moments) + + if composition.gyrokinetic_ions + ff = scratch_dummy.buffer_vpavperpzrs_1 + # fill buffer with ring-averaged F (gyroaverage at fixed position) + gyroaverage_pdf!(ff,new_scratch.pdf,gyroavs,vpa,vperp,z,r,composition) + else + ff = new_scratch.pdf + end + + if !moments.evolve_density + update_density!(new_scratch.density, moments.ion.dens_updated, + ff, vpa, vperp, z, r, composition) + end + if !moments.evolve_upar + update_upar!(new_scratch.upar, moments.ion.upar_updated, new_scratch.density, + new_scratch.ppar, ff, vpa, vperp, z, r, composition, + moments.evolve_density, moments.evolve_ppar) + end + if !moments.evolve_ppar + # update_ppar! calculates (p_parallel/m_s N_e c_s^2) + (n_s/N_e)*(upar_s/c_s)^2 = (1/√π)∫d(vpa/c_s) (vpa/c_s)^2 * (√π f_s c_s / N_e) + update_ppar!(new_scratch.ppar, moments.ion.ppar_updated, new_scratch.density, + new_scratch.upar, ff, vpa, vperp, z, r, composition, + moments.evolve_density, moments.evolve_upar) + end + update_pperp!(new_scratch.pperp, ff, vpa, vperp, z, r, composition) + + # if diagnostic time step/RK stage + # update the diagnostic chodura condition + if diagnostic_moments + update_chodura!(moments,ff,vpa,vperp,z,r,r_spectral,composition,geometry,scratch_dummy,z_advect) + end + # update the thermal speed + begin_s_r_z_region() + try #below block causes DomainError if ppar < 0 or density, so exit cleanly if possible + update_vth!(moments.ion.vth, new_scratch.ppar, new_scratch.pperp, new_scratch.density, vperp, z, r, composition) + catch e + if global_size[] > 1 + println("ERROR: error calculating vth in time_advance.jl") + println(e) + display(stacktrace(catch_backtrace())) + flush(stdout) + flush(stderr) + MPI.Abort(comm_world, 1) + end + rethrow(e) + end + # update the parallel heat flux + update_qpar!(moments.ion.qpar, moments.ion.qpar_updated, new_scratch.density, + new_scratch.upar, moments.ion.vth, ff, vpa, vperp, z, r, + composition, moments.evolve_density, moments.evolve_upar, + moments.evolve_ppar) + # add further moments to be computed here + +end + +""" +update velocity moments that are calculable from the evolved neutral pdf +""" +function update_derived_moments_neutral!(new_scratch, moments, vz, vr, vzeta, z, r, + composition) + + if !moments.evolve_density + update_neutral_density!(new_scratch.density_neutral, moments.neutral.dens_updated, + new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition) + end + if !moments.evolve_upar + update_neutral_uz!(new_scratch.uz_neutral, moments.neutral.uz_updated, + new_scratch.density_neutral, new_scratch.pz_neutral, + new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, + moments.evolve_density, moments.evolve_ppar) + end + if !moments.evolve_ppar + update_neutral_pz!(new_scratch.pz_neutral, moments.neutral.pz_updated, + new_scratch.density_neutral, new_scratch.uz_neutral, + new_scratch.pdf_neutral, vz, vr, vzeta, z, r, composition, + moments.evolve_density, moments.evolve_upar) + end +end + """ computes the integral over vpa of the integrand, using the input vpa_wgts """ diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 81b0b54ff..07174ab10 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -11,8 +11,9 @@ using ..communication using ..looping using ..moment_constraints: hard_force_moment_constraints!, moment_constraints_on_residual! -using ..moment_kinetics_structs: weak_discretization_info +using ..moment_kinetics_structs: scratch_pdf, weak_discretization_info using ..nonlinear_solvers: newton_solve! +using ..velocity_moments: update_derived_moments!, calculate_ion_moment_derivatives! using ..array_allocation: allocate_float using ..boundary_conditions: vpagrid_to_dzdt @@ -43,20 +44,32 @@ end """ """ -function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, vperp, z, - r, dt, t, vpa_spectral, composition, collisions, +function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_advect, + vpa, vperp, z, r, dt, t, r_spectral, z_spectral, + vpa_spectral, composition, collisions, ion_source_settings, geometry, nl_solver_params, - vpa_diffusion, num_diss_params) + vpa_diffusion, num_diss_params, gyroavs, scratch_dummy) if vperp.n > 1 && (moments.evolve_density || moments.evolve_upar || moments.evolve_ppar) error("Moment constraints in implicit_vpa_advection!() do not support 2V runs yet") end # calculate the advection speed corresponding to current f - update_speed_vpa!(advect, fields, fvec_in, moments, vpa, vperp, z, r, composition, + update_speed_vpa!(vpa_advect, fields, fvec_in, moments, vpa, vperp, z, r, composition, collisions, ion_source_settings, t, geometry) + # Ensure moments are consistent with f_new + new_scratch = scratch_pdf(f_out, fvec_in.density, fvec_in.upar, fvec_in.ppar, + fvec_in.pperp, fvec_in.temp_z_s, fvec_in.pdf_neutral, + fvec_in.density_neutral, fvec_in.uz_neutral, + fvec_in.pz_neutral) + update_derived_moments!(new_scratch, moments, vpa, vperp, z, r, composition, + r_spectral, geometry, gyroavs, scratch_dummy, z_advect, false) + calculate_ion_moment_derivatives!(moments, new_scratch, scratch_dummy, z, + z_spectral, + num_diss_params.ion.moment_dissipation_coefficient) begin_s_r_z_vperp_region() + coords = (vpa=vpa,) vpa_bc = vpa.bc minval = num_diss_params.ion.force_minimum_pdf_value @@ -66,7 +79,7 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v @loop_r_z_vperp ir iz ivperp begin f_old_no_bc = @view fvec_in.pdf[:,ivperp,iz,ir,is] this_f_out = @view f_out[:,ivperp,iz,ir,is] - speed = @view advect[is].speed[:,ivperp,iz,ir] + speed = @view vpa_advect[is].speed[:,ivperp,iz,ir] if z.irank == 0 && iz == 1 @. vpa.scratch = vpagrid_to_dzdt(vpa.grid, moments.ion.vth[iz,ir,is], @@ -252,7 +265,7 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, advect, vpa, v function residual_func!(residual, f_new) apply_bc!(f_new) residual .= f_old - advance_f_local!(residual, f_new, advect[is], ivperp, iz, ir, vpa, dt, + advance_f_local!(residual, f_new, vpa_advect[is], ivperp, iz, ir, vpa, dt, vpa_spectral) if vpa_diffusion From a4738bc7da247ec157657400bd1e208863681e8c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 26 May 2024 15:28:02 +0100 Subject: [PATCH 59/75] Plot CFL conditions for implicit terms, but dotted --- .../src/makie_post_processing.jl | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 546a6a8db..d459571cd 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7306,12 +7306,17 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) else time = ri.time end + CFL_vars = String[] - if !ri.t_input["implicit_ion_advance"] - push!(CFL_vars, "minimum_CFL_ion_z") + implicit_CFL_vars = String[] + + push!(CFL_vars, "minimum_CFL_ion_z") + if ri.t_input["implicit_ion_advance"] + push!(implicit_CFL_vars, "minimum_CFL_ion_z") end - if !(ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) - push!(CFL_vars, "minimum_CFL_ion_vpa") + push!(CFL_vars, "minimum_CFL_ion_vpa") + if (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) + push!(implicit_CFL_vars, "minimum_CFL_ion_vpa") end if ri.n_neutral_species > 0 push!(CFL_vars, "minimum_CFL_neutral_z", "minimum_CFL_neutral_vz") @@ -7320,14 +7325,22 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) var = get_variable(ri, varname) maxval = NaNMath.min(maxval, NaNMath.maximum(var)) if occursin("neutral", varname) - linestyle = :dash + if varname ∈ implicit_CFL_vars + linestyle = :dashdot + else + linestyle = :dash + end else - linestyle = nothing + if varname ∈ implicit_CFL_vars + linestyle = :dot + else + linestyle = nothing + end end plot_1d(time, var; ax=ax, label=prefix*varname, linestyle=linestyle) end end - ylims!(ax, 0.0, 4.0 * maxval) + ylims!(ax, 0.0, 10.0 * maxval) put_legend_right(CFL_fig, ax) limits_fig, ax = get_1d_ax(; xlabel="time", ylabel="number of limits per factor per output", From f0a0bc138745a3c60f27d626c35b61cfbdc2274c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 26 May 2024 17:43:01 +0100 Subject: [PATCH 60/75] Impose moment constraints on residual for implicit ion advance --- moment_kinetics/src/time_advance.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 0ada40e49..17f774e4f 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -2896,6 +2896,12 @@ function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_o apply_bc!(residual) + begin_s_r_z_region() + @loop_s_r_z is ir iz begin + @views moment_constraints_on_residual!(residual[:,:,iz,ir,is], + f_new[:,:,iz,ir,is], moments, vpa) + end + return nothing end From 9b8bcef4ab29f7f304ba5d7394b625dfab898aea Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 26 May 2024 17:44:27 +0100 Subject: [PATCH 61/75] Comment out preconditioner in implicit_vpa_advection!() The preconditioner does not seem to work at the moment, but unpreconditioned advance does. Needs more work in future if we want to use this option. --- moment_kinetics/src/vpa_advection.jl | 229 +++++++++++++-------------- 1 file changed, 114 insertions(+), 115 deletions(-) diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 07174ab10..bd70b3503 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -140,122 +140,121 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_ f_old = vpa.scratch7 .= f_old_no_bc apply_bc!(f_old) - if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 - advection_matrix = allocate_float(vpa.n, vpa.n) - advection_matrix .= 0.0 - for i ∈ 1:vpa.nelement_local - imin = vpa.imin[i] - (i != 1) - imax = vpa.imax[i] - if i == 1 - advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] - else - if speed[imin] < 0.0 - advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] - elseif speed[imin] > 0.0 - # Do nothing - else - advection_matrix[imin,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] - end - end - advection_matrix[imin+1:imax-1,imin:imax] .+= vpa_spectral.lobatto.Dmat[2:end-1,:] ./ vpa.element_scale[i] - if i == vpa.nelement_local - advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] - else - if speed[imax] < 0.0 - # Do nothing - elseif speed[imax] > 0.0 - advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] - else - advection_matrix[imax,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] - end - end - end - # Multiply by advection speed - for i ∈ 1:vpa.n - advection_matrix[i,:] .*= dt * speed[i] - end - for i ∈ 1:vpa.n - advection_matrix[i,i] += 1.0 - end - - if isa(vpa_spectral, weak_discretization_info) - # This allocates a new matrix - to avoid this would need to pre-allocate a - # suitable buffer somewhere and use `mul!()`. - advection_matrix = vpa_spectral.mass_matrix * advection_matrix - @. advection_matrix -= dt * vpa_dissipation_coefficient * vpa_spectral.K_matrix - elseif vpa_dissipation_coefficient > 0.0 - error("Non-weak-form schemes cannot precondition diffusion") - end - - # hacky (?) Dirichlet boundary conditions - this_f_out[1] = 0.0 - this_f_out[end] = 0.0 - advection_matrix[1,:] .= 0.0 - advection_matrix[1,1] = 1.0 - advection_matrix[end,:] .= 0.0 - advection_matrix[end,end] = 1.0 - - if z.bc == "wall" - if z.irank == 0 && iz == 1 - # Set equal df/dt equal to f on points that should be set to zero for - # boundary condition. The vector that the inverse of the advection matrix - # acts on should have zeros there already. - advection_matrix[icut_lower_z:end,icut_lower_z:end] .= 0.0 - for i ∈ icut_lower_z:vpa.n - advection_matrix[i,i] = 1.0 - end - end - if z.irank == z.nrank - 1 && iz == z.n - # Set equal df/dt equal to f on points that should be set to zero for - # boundary condition. The vector that the inverse of the advection matrix - # acts on should have zeros there already. - # I comes from LinearAlgebra and represents identity matrix - advection_matrix[1:icut_upper_z,1:icut_upper_z] .= 0.0 - for i ∈ 1:icut_upper_z - advection_matrix[i,i] = 1.0 - end - end - end - - advection_matrix = sparse(advection_matrix) - nl_solver_params.preconditioners[ivperp,iz,ir,is] = lu(advection_matrix) - end - - function preconditioner(x) - if isa(vpa_spectral, weak_discretization_info) - # Multiply by mass matrix, storing result in vpa.scratch - mul!(vpa.scratch, vpa_spectral.mass_matrix, x) - end - - # Handle boundary conditions - enforce_v_boundary_condition_local!(vpa.scratch, vpa_bc, speed, vpa_diffusion, - vpa, vpa_spectral) - - if z.bc == "wall" - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_new, f_old, and residual should all - # be zero at exactly the same set of grid points, so it is reasonable - # to zero-out `residual` to impose the boundary condition. We impose - # this after subtracting f_old in case rounding errors, etc. mean that - # at some point f_old had a different boundary condition cut-off - # index. - if z.irank == 0 && iz == 1 - vpa.scratch[icut_lower_z:end] .= 0.0 -# println("at icut_lower_z ", f_new[icut_lower_z], " ", f_old[icut_lower_z]) - end - # absolute velocity at right boundary - if z.irank == z.nrank - 1 && iz == z.n - vpa.scratch[1:icut_upper_z] .= 0.0 - end - end - - # Do LU application on vpa.scratch, storing result in x - ldiv!(x, nl_solver_params.preconditioners[ivperp,iz,ir,is], vpa.scratch) - return nothing - end - #left_preconditioner = preconditioner - right_preconditioner = identity + #if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + # advection_matrix = allocate_float(vpa.n, vpa.n) + # advection_matrix .= 0.0 + # for i ∈ 1:vpa.nelement_local + # imin = vpa.imin[i] - (i != 1) + # imax = vpa.imax[i] + # if i == 1 + # advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + # else + # if speed[imin] < 0.0 + # advection_matrix[imin,imin:imax] .+= vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + # elseif speed[imin] > 0.0 + # # Do nothing + # else + # advection_matrix[imin,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[1,:] ./ vpa.element_scale[i] + # end + # end + # advection_matrix[imin+1:imax-1,imin:imax] .+= vpa_spectral.lobatto.Dmat[2:end-1,:] ./ vpa.element_scale[i] + # if i == vpa.nelement_local + # advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + # else + # if speed[imax] < 0.0 + # # Do nothing + # elseif speed[imax] > 0.0 + # advection_matrix[imax,imin:imax] .+= vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + # else + # advection_matrix[imax,imin:imax] .+= 0.5 .* vpa_spectral.lobatto.Dmat[end,:] ./ vpa.element_scale[i] + # end + # end + # end + # # Multiply by advection speed + # for i ∈ 1:vpa.n + # advection_matrix[i,:] .*= dt * speed[i] + # end + # for i ∈ 1:vpa.n + # advection_matrix[i,i] += 1.0 + # end + + # if isa(vpa_spectral, weak_discretization_info) + # # This allocates a new matrix - to avoid this would need to pre-allocate a + # # suitable buffer somewhere and use `mul!()`. + # advection_matrix = vpa_spectral.mass_matrix * advection_matrix + # @. advection_matrix -= dt * vpa_dissipation_coefficient * vpa_spectral.K_matrix + # elseif vpa_dissipation_coefficient > 0.0 + # error("Non-weak-form schemes cannot precondition diffusion") + # end + + # # hacky (?) Dirichlet boundary conditions + # this_f_out[1] = 0.0 + # this_f_out[end] = 0.0 + # advection_matrix[1,:] .= 0.0 + # advection_matrix[1,1] = 1.0 + # advection_matrix[end,:] .= 0.0 + # advection_matrix[end,end] = 1.0 + + # if z.bc == "wall" + # if z.irank == 0 && iz == 1 + # # Set equal df/dt equal to f on points that should be set to zero for + # # boundary condition. The vector that the inverse of the advection matrix + # # acts on should have zeros there already. + # advection_matrix[icut_lower_z:end,icut_lower_z:end] .= 0.0 + # for i ∈ icut_lower_z:vpa.n + # advection_matrix[i,i] = 1.0 + # end + # end + # if z.irank == z.nrank - 1 && iz == z.n + # # Set equal df/dt equal to f on points that should be set to zero for + # # boundary condition. The vector that the inverse of the advection matrix + # # acts on should have zeros there already. + # # I comes from LinearAlgebra and represents identity matrix + # advection_matrix[1:icut_upper_z,1:icut_upper_z] .= 0.0 + # for i ∈ 1:icut_upper_z + # advection_matrix[i,i] = 1.0 + # end + # end + # end + + # advection_matrix = sparse(advection_matrix) + # nl_solver_params.preconditioners[ivperp,iz,ir,is] = lu(advection_matrix) + #end + + #function preconditioner(x) + # if isa(vpa_spectral, weak_discretization_info) + # # Multiply by mass matrix, storing result in vpa.scratch + # mul!(vpa.scratch, vpa_spectral.mass_matrix, x) + # end + + # # Handle boundary conditions + # enforce_v_boundary_condition_local!(vpa.scratch, vpa_bc, speed, vpa_diffusion, + # vpa, vpa_spectral) + + # if z.bc == "wall" + # # Wall boundary conditions. Note that as density, upar, ppar do not + # # change in this implicit step, f_new, f_old, and residual should all + # # be zero at exactly the same set of grid points, so it is reasonable + # # to zero-out `residual` to impose the boundary condition. We impose + # # this after subtracting f_old in case rounding errors, etc. mean that + # # at some point f_old had a different boundary condition cut-off + # # index. + # if z.irank == 0 && iz == 1 + # vpa.scratch[icut_lower_z:end] .= 0.0 +# # println("at icut_lower_z ", f_new[icut_lower_z], " ", f_old[icut_lower_z]) + # end + # # absolute velocity at right boundary + # if z.irank == z.nrank - 1 && iz == z.n + # vpa.scratch[1:icut_upper_z] .= 0.0 + # end + # end + + # # Do LU application on vpa.scratch, storing result in x + # ldiv!(x, nl_solver_params.preconditioners[ivperp,iz,ir,is], vpa.scratch) + # return nothing + #end left_preconditioner = identity + right_preconditioner = identity #right_preconditioner = preconditioner # Define a function whose input is `f_new`, so that when it's output From c82fe0573041a3e98757b99af317253e33588b49 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 26 May 2024 18:34:01 +0100 Subject: [PATCH 62/75] Collect nonlinear solver diagnostics from all processes, where necessary --- moment_kinetics/src/file_io.jl | 1 + moment_kinetics/src/nonlinear_solvers.jl | 35 ++++++++++++++++++++++-- moment_kinetics/src/time_advance.jl | 4 +++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index e63ae81cd..1aea757ce 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -1423,6 +1423,7 @@ function write_all_moments_data_to_binary(moments, fields, t, n_ion_species, n_neutral_species, io_or_file_info_moments, t_idx, time_for_run, t_params, nl_solver_params, r, z) + @serial_region begin # Only read/write from first process in each 'block' diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 33f4f8581..1337e1900 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -25,7 +25,8 @@ Useful references: """ module nonlinear_solvers -export setup_nonlinear_solve, reset_nonlinear_per_stage_counters, newton_solve! +export setup_nonlinear_solve, gather_nonlinear_solver_counters!, + reset_nonlinear_per_stage_counters, newton_solve! using ..array_allocation: allocate_float, allocate_shared_float using ..communication @@ -53,6 +54,9 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} n_solves::Ref{mk_int} nonlinear_iterations::Ref{mk_int} linear_iterations::Ref{mk_int} + global_n_solves::Ref{mk_int} + global_nonlinear_iterations::Ref{mk_int} + global_linear_iterations::Ref{mk_int} stage_counter::Ref{mk_int} serial_solve::Bool max_nonlinear_iterations_this_step::Ref{mk_int} @@ -122,8 +126,8 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol nl_solver_input.nonlinear_max_iterations, nl_solver_input.linear_rtol, nl_solver_input.linear_atol, linear_restart, nl_solver_input.linear_max_restarts, H, V, - linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), - serial_solve, Ref(0), + linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), + Ref(0), Ref(0), serial_solve, Ref(0), nl_solver_input.preconditioner_update_interval, preconditioners) end @@ -142,6 +146,31 @@ function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_in return nothing end +""" + gather_nonlinear_solver_counters!(nl_solver_params) + +Where necessary, gather the iteration counters for the nonlinear solvers. + +Where each solve runs in parallel using all processes, this is unnecessary as the count on +each process already represents the global count. Where each solve uses only a subset of +processes, the counters from different solves need to be added together to get the global +total. +""" +function gather_nonlinear_solver_counters!(nl_solver_params) + if nl_solver_params.ion_advance !== nothing + # Solve runs in parallel on all processes, so no need to collect here + nl_solver_params.ion_advance.global_n_solves[] = nl_solver_params.ion_advance.n_solves[] + nl_solver_params.ion_advance.global_nonlinear_iterations[] = nl_solver_params.ion_advance.nonlinear_iterations[] + nl_solver_params.ion_advance.global_linear_iterations[] = nl_solver_params.ion_advance.linear_iterations[] + end + if nl_solver_params.vpa_advection !== nothing + # Solves are run in serial on separate processes, so need a global Allreduce + nl_solver_params.vpa_advection.global_n_solves[] = MPI.Allreduce(nl_solver_params.vpa_advection.n_solves[], +, comm_world) + nl_solver_params.vpa_advection.global_nonlinear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world) + nl_solver_params.vpa_advection.global_linear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world) + end +end + """ newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, w, nl_solver_params; left_preconditioner=nothing, right_preconditioner=nothing, coords) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 17f774e4f..43873a6dd 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -1418,6 +1418,10 @@ function time_advance!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzet finish_now = true end + # Do MPI communication to add up counters from different processes, where + # necessary. + gather_nonlinear_solver_counters!(nl_solver_params) + time_for_run = to_minutes(now() - start_time) end # write moments data to file From c92e7610a9ebdcb915930d8aea51c691cd3e1d1b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 26 May 2024 21:54:10 +0100 Subject: [PATCH 63/75] Increase 'epsilon' for approximate Jacobian calculation A test run using implicit vpa advection fails using `epsilon=1.0e-8` fails, but runs using `epsilon=1.0e-6`, so increase the hard-coded value to 1.0e-6. Maybe at some point this should be a user-settable parameter. --- moment_kinetics/src/nonlinear_solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 1337e1900..ee11c45d0 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -781,7 +781,7 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # J δx = residual0 tol = max(rtol, atol) - epsilon = 1.0e-8 / tol + epsilon = 1.0e-6 / tol inv_epsilon = 1.0 / epsilon function approximate_Jacobian_vector_product!(v) From 208e700ccbf1111843308cf97a622b42e951aef4 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 26 May 2024 22:27:36 +0100 Subject: [PATCH 64/75] Decrease range of step sizes for line search in nonlinear solves Decreasing the step size scale factor below 1.0e-2 rarely results in a decreasing residual if one has not already been found, so set 1.0e-2 as the minimum to avoid wasted residual calculations. --- moment_kinetics/src/nonlinear_solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index ee11c45d0..7219c1530 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -307,7 +307,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # Do a line search between x and x+delta_x to try to find an update that does # decrease residual_norm s = 0.5 - while s > 1.0e-5 + while s > 1.0e-2 parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) residual_func!(residual, x) residual_norm = distributed_norm(residual) From baffe03b56207bf69094a568e0a171135b2b0ea1 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 29 May 2024 13:21:46 +0100 Subject: [PATCH 65/75] Remove extra status printouts ...these were only introduced for debugging. --- moment_kinetics/src/time_advance.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 43873a6dd..915e539d0 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -2217,9 +2217,6 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa # success is set to false if an iteration failed to converge in an implicit solve success = true for istage ∈ 1:n_rk_stages - if global_rank[] == 0 - println("ion step ", t_params.step_counter[], ".", istage, " ", t) - end if t_params.rk_coefs_implicit !== nothing update_solution_vector!(scratch_implicit[istage], scratch[istage], moments, composition, vpa, vperp, z, r) From 5b3800c28263abbd455bb3c8c069f762b581d2ed Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 30 May 2024 15:44:59 +0100 Subject: [PATCH 66/75] Fix call to neutral_ionization_collisions_3V!() The arguments had not been fully updated when ion and neutral ionization collisions were split into separate functions. --- moment_kinetics/src/ionization.jl | 1 - moment_kinetics/src/time_advance.jl | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/moment_kinetics/src/ionization.jl b/moment_kinetics/src/ionization.jl index 63779c3ff..babdd5d9b 100644 --- a/moment_kinetics/src/ionization.jl +++ b/moment_kinetics/src/ionization.jl @@ -232,7 +232,6 @@ function neutral_ionization_collisions_3V!(f_neutral_out, fvec_in, composition, # neutral "ionization" (depopulation) rate = - f_n n_e R_ion #NB: used quasineutrality to replace electron density n_e with ion density #NEEDS GENERALISATION TO n_ion_species > 1 (missing species charge: Sum_i Z_i n_i = n_e) - # for ion species we need gyroaveraged neutral pdf, which is not stored in fvec (scratch[istage]) begin_sn_r_z_vzeta_vr_vz_region() @loop_sn isn begin for is ∈ 1:composition.n_ion_species diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 915e539d0..6b4bccc6a 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -2524,9 +2524,8 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, z, r, vz_spectral, moments, composition, collisions, dt) elseif advance.neutral_ionization_collisions - neutral_ionization_collisions_3V!(fvec_out.pdf_neutral, pdf.neutral.buffer, fvec_in, - composition, vz, vr, vzeta, vpa, vperp, z, r, - collisions, dt) + neutral_ionization_collisions_3V!(fvec_out.pdf_neutral, fvec_in, composition, vz, + vr, vzeta, vpa, vperp, z, r, collisions, dt) end if advance.ionization_source constant_ionization_source!(fvec_out.pdf, fvec_in, vpa, vperp, z, r, moments, From 0a98a7aa5d48f9ac3de4c5a9e091bd8b5cb5c1d9 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 1 Jun 2024 19:52:45 +0100 Subject: [PATCH 67/75] Use nonlinear_max_iterations=100 to avoid warning messages from tests --- moment_kinetics/test/nonlinear_solver_tests.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index c592082ff..b32304f3c 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -190,7 +190,8 @@ function nonlinear_test() Dict{String,Any}("rtol" => 0.0, "atol" => atol, "linear_restart" => restart, - "linear_max_restarts" => max_restarts)), + "linear_max_restarts" => max_restarts, + "nonlinear_max_iterations" => 100)), coords) newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; From ec542b18e82b6f4ab8b21ab53ca9744e0c16d7da Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 3 Jun 2024 17:28:57 +0100 Subject: [PATCH 68/75] Fix for plotting of timestep diagnostics --- .../makie_post_processing/src/makie_post_processing.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index d459571cd..c64e74a5d 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -7311,11 +7311,11 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) implicit_CFL_vars = String[] push!(CFL_vars, "minimum_CFL_ion_z") - if ri.t_input["implicit_ion_advance"] + if occursin("ARK", ri.t_input["type"]) && ri.t_input["implicit_ion_advance"] push!(implicit_CFL_vars, "minimum_CFL_ion_z") end push!(CFL_vars, "minimum_CFL_ion_vpa") - if (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) + if occursin("ARK", ri.t_input["type"]) && (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) push!(implicit_CFL_vars, "minimum_CFL_ion_vpa") end if ri.n_neutral_species > 0 @@ -7435,14 +7435,14 @@ function timestep_diagnostics(run_info; plot_prefix=nothing, it=nothing) end end - if !ri.t_input["implicit_ion_advance"] + if !(occursin("ARK", ri.t_input["type"]) && ri.t_input["implicit_ion_advance"]) # Ion z advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; label=prefix * "ion z advect", ax=ax, linestyle=:dot) end - if !(ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"]) + if !(occursin("ARK", ri.t_input["type"]) && (ri.t_input["implicit_ion_advance"] || ri.t_input["implicit_vpa_advection"])) # Ion vpa advection counter += 1 plot_1d(time, @view limit_caused_by_per_output[counter,:]; From 2033d7f48c94dcd5a0db35ce32b8b863110b83cc Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 4 Jun 2024 16:54:21 +0100 Subject: [PATCH 69/75] Make nonlinear solver tests work in parallel --- .../test/nonlinear_solver_tests.jl | 119 +++++++++++++----- 1 file changed, 90 insertions(+), 29 deletions(-) diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index b32304f3c..1e63de1be 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -2,7 +2,7 @@ module NonlinearSolverTests include("setup.jl") -using moment_kinetics.array_allocation: allocate_shared_float +using moment_kinetics.array_allocation: allocate_float, allocate_shared_float using moment_kinetics.communication using moment_kinetics.coordinates: coordinate using moment_kinetics.input_structs: advection_input @@ -15,7 +15,7 @@ using MPI function linear_test() println(" - linear test") - @testset "linear test $coord_names" for coord_names ∈ ((:z,), (:vpa,)) + @testset "linear test $coord_names" for (coord_names, serial_solve) ∈ (((:z,), false), ((:vpa,), true)) # Test represents constant-coefficient diffusion, in 1D steady state, with a # central finite-difference discretisation of the second derivative. # @@ -67,28 +67,48 @@ function linear_test() coords = NamedTuple(c => the_coord for c ∈ coord_names) function rhs_func!(residual, x) - begin_serial_region() - @serial_region begin + if serial_solve residual .= A * x - b + else + begin_serial_region() + @serial_region begin + residual .= A * x - b + end end return nothing end - x = allocate_shared_float(n) - residual = allocate_shared_float(n) - delta_x = allocate_shared_float(n) - rhs_delta = allocate_shared_float(n) - v = allocate_shared_float(n) - w = allocate_shared_float(n) + if serial_solve + x = allocate_float(n) + residual = allocate_float(n) + delta_x = allocate_float(n) + rhs_delta = allocate_float(n) + v = allocate_float(n) + w = allocate_float(n) - begin_serial_region() - @serial_region begin x .= 0.0 residual .= 0.0 delta_x .= 0.0 rhs_delta .= 0.0 v .= 0.0 w .= 0.0 + else + x = allocate_shared_float(n) + residual = allocate_shared_float(n) + delta_x = allocate_shared_float(n) + rhs_delta = allocate_shared_float(n) + v = allocate_shared_float(n) + w = allocate_shared_float(n) + + begin_serial_region() + @serial_region begin + x .= 0.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + end end nl_solver_params = setup_nonlinear_solve( @@ -97,23 +117,29 @@ function linear_test() "atol" => atol, "linear_restart" => restart, "linear_max_restarts" => max_restarts)), - coords) + coords; serial_solve=serial_solve) newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; coords) - begin_serial_region() - @serial_region begin + if serial_solve x_direct = A \ b @test isapprox(x, x_direct; atol=100.0*atol) + else + begin_serial_region() + @serial_region begin + x_direct = A \ b + + @test isapprox(x, x_direct; atol=100.0*atol) + end end end end function nonlinear_test() println(" - non-linear test") - @testset "non-linear test" for coord_names ∈ ((:z,), (:vpa,)) + @testset "non-linear test" for (coord_names, serial_solve) ∈ (((:z,), false), ((:vpa,), true)) # Test represents constant-coefficient diffusion, in 1D steady state, with a # central finite-difference discretisation of the second derivative. # @@ -152,8 +178,7 @@ function nonlinear_test() coords = NamedTuple(c => the_coord for c ∈ coord_names) function rhs_func!(residual, x) - begin_serial_region() - @serial_region begin + if serial_solve i = 1 D = abs(x[i])^2.5 residual[i] = D * (- 2.0 * x[i] + x[i+1]) - b[i] @@ -164,25 +189,57 @@ function nonlinear_test() i = n D = abs(x[i])^2.5 residual[i] = D * (x[i-1] - 2.0 * x[i]) - b[i] + else + begin_serial_region() + @serial_region begin + i = 1 + D = abs(x[i])^2.5 + residual[i] = D * (- 2.0 * x[i] + x[i+1]) - b[i] + for i ∈ 2:n-1 + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i] + x[i+1]) - b[i] + end + i = n + D = abs(x[i])^2.5 + residual[i] = D * (x[i-1] - 2.0 * x[i]) - b[i] + end end return nothing end - x = allocate_shared_float(n) - residual = allocate_shared_float(n) - delta_x = allocate_shared_float(n) - rhs_delta = allocate_shared_float(n) - v = allocate_shared_float(n) - w = allocate_shared_float(n) + if serial_solve + x = allocate_float(n) + residual = allocate_float(n) + delta_x = allocate_float(n) + rhs_delta = allocate_float(n) + v = allocate_float(n) + w = allocate_float(n) + else + x = allocate_shared_float(n) + residual = allocate_shared_float(n) + delta_x = allocate_shared_float(n) + rhs_delta = allocate_shared_float(n) + v = allocate_shared_float(n) + w = allocate_shared_float(n) + end - begin_serial_region() - @serial_region begin + if serial_solve x .= 1.0 residual .= 0.0 delta_x .= 0.0 rhs_delta .= 0.0 v .= 0.0 w .= 0.0 + else + begin_serial_region() + @serial_region begin + x .= 1.0 + residual .= 0.0 + delta_x .= 0.0 + rhs_delta .= 0.0 + v .= 0.0 + w .= 0.0 + end end nl_solver_params = setup_nonlinear_solve( @@ -192,16 +249,20 @@ function nonlinear_test() "linear_restart" => restart, "linear_max_restarts" => max_restarts, "nonlinear_max_iterations" => 100)), - coords) + coords; serial_solve=serial_solve) newton_solve!(x, rhs_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; coords) rhs_func!(residual, x) - begin_serial_region() - @serial_region begin + if serial_solve @test isapprox(residual, zeros(n); atol=4.0*atol) + else + begin_serial_region() + @serial_region begin + @test isapprox(residual, zeros(n); atol=4.0*atol) + end end end end From 4d84e9bfaed703b272a71d885d06f269f8dab254 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 5 Jun 2024 23:02:21 +0100 Subject: [PATCH 70/75] Calculate low-order solution rather than error for adaptive RK schemes This will allow us to apply boundary conditions and constraints to the low-order solution, which is easier than applying boundary conditions to the error. --- moment_kinetics/src/runge_kutta.jl | 107 ++++++++++++++-------------- moment_kinetics/src/time_advance.jl | 20 +++--- util/calculate_rk_coeffs.jl | 56 ++++++++------- util/test-rk-timestep.jl | 34 ++++----- 4 files changed, 113 insertions(+), 104 deletions(-) diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index ec53bf40e..d2767a90f 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -43,7 +43,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1 ; 0 0 0 0 -11//40 34//55 8//11 ; - 0 0 0 0 0 2//55 -1 ] + 0 0 0 0 0 2//55 0 ] n_rk_stages = 6 rk_order = 5 adaptive = true @@ -78,7 +78,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; - 0 0 0 0 0 0 0 0 0 1//10 -1 ] + 0 0 0 0 0 0 0 0 0 1//10 0 ] n_rk_stages = 10 rk_order = 4 adaptive = true @@ -91,22 +91,22 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat elseif type == "Fekete6(4)" # Fekete 6-stage 4th-order SSPRK (see comments in util/calculate_rk_coeffs.jl. # Note Fekete et al. recommend the 10-stage method rather than this one. - #rk_coeffs = mk_float[0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; - # 0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0 2.9254376698872875e-14 -0.18902907903375094 ; - # 0.0 0.33178669836 0.25530138316744333 -3.352873534367973e-14 0.0 0.2059808002676668 0.2504712436879622 ; - # 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522 ; - # 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.986456106503283e-14 1.1993626679930305 ; - # 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745 ; - # 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524 ] + #rk_coefs = mk_float[0.6447024483081 0.23869944753332645 0.5474858792272213 0.3762853856474131 -6.304828384656085e-17 -0.1813232670344333 -1.0017300417984671 ; + # 0.3552975516919 0.4295138541066736 -6.460461358323626e-14 -1.1868936325049587e-13 3.608184516786869e-18 2.9392365006883485e-14 -0.18902907903375094; + # 0.0 0.33178669836 0.25530138316744333 -3.3545605887402925e-14 -1.0929532856876731e-17 0.20598080026766677 0.2504712436879622 ; + # 0.0 0.0 0.1972127376054 0.3518900216285391 7.036963218665071e-17 0.47926701162417157 -0.939747918037452 ; + # 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.97599117309567e-14 1.1993626679930303 ; + # 0.0 0.0 0.0 0.0 0.4358156542577 0.34165678726956566 -0.5310335716309745 ; + # 0.0 0.0 0.0 0.0 0.0 0.1544186678729 1.2117066988196523 ] # Might as well set to 0 the entries that look like they should be 0 apart from # rounding errors. - rk_coefs = mk_float[0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; - 0.3552975516919 0.4295138541066736 0.0 0.0 0.0 0.0 -0.18902907903375094 ; - 0.0 0.33178669836 0.25530138316744333 0.0 0.0 0.2059808002676668 0.2504712436879622 ; - 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522 ; - 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 0.0 1.1993626679930305 ; - 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745 ; - 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524 ] + rk_coefs = mk_float[0.6447024483081 0.23869944753332645 0.5474858792272213 0.3762853856474131 0.0 -0.1813232670344333 -0.0017300417984673633 ; + 0.3552975516919 0.4295138541066736 0.0 0.0 0.0 0.0 -0.18902907903375094 ; + 0.0 0.33178669836 0.25530138316744333 0.0 0.0 0.20598080026766677 0.2504712436879622 ; + 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.47926701162417157 -0.939747918037452 ; + 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 0.0 1.1993626679930303 ; + 0.0 0.0 0.0 0.0 0.4358156542577 0.34165678726956566 -0.5310335716309745 ; + 0.0 0.0 0.0 0.0 0.0 0.1544186678729 1.2117066988196523 ] n_rk_stages = 6 rk_order = 4 adaptive = true @@ -122,7 +122,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat # the addition of a truncation error estimate. rk_coefs = mk_float[1//2 0 2//3 0 -1//2; 0 1//2 1//6 1//2 1 ; - 1//2 1//2 1//6 1//2 -1//2] + 1//2 1//2 1//6 1//2 1//2] n_rk_stages = 4 rk_order = 3 adaptive = true @@ -138,7 +138,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0 ; 0 0 1//3 1//2 3//16; - 0 0 0 1//4 -1//4 ] + 0 0 0 1//4 3//4 ] n_rk_stages = 4 rk_order = 2 adaptive = true @@ -158,7 +158,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; - 0 0 0 0 0 0 247//2000 -29//2470] + 0 0 0 0 0 0 247//2000 2441//2470] rk_coefs_implicit = mk_float[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000; 0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820; 0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164; @@ -184,7 +184,7 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; - 0 0 0 1767732205903//4055673282236 -189157537172543652255956//2412892370833855116699825] + 0 0 0 1767732205903//4055673282236 2223734833661311464443869//2412892370833855116699825] rk_coefs_implicit = mk_float[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200; 0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425; 0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225; @@ -413,16 +413,16 @@ function rk_update_variable!(scratch, scratch_implicit, var_symbol::Symbol, t_pa end """ -Calculate the estimated truncation error for the variable named `var_symbol`, for adaptive -timestepping methods. +Calculate a lower-order approximation for the variable named `var_symbol`, which can be +used to calculate an error estimate for adaptive timestepping methods. -The calculated error is stored in `var_symbol` in `scratch[2]` (as this entry should not -be needed again after the error is calculated). +The lower-order approximation is stored in `var_symbol` in `scratch[2]` (as this entry +should not be needed again after the lower-order approximation is calculated). """ -function rk_error_variable!(scratch, scratch_implicit, var_symbol::Symbol, t_params; - neutrals=false) +function rk_loworder_solution!(scratch, scratch_implicit, var_symbol::Symbol, t_params; + neutrals=false) if !t_params.adaptive - error("rk_error_variable!() should only be called when using adaptive " + error("rk_lowerder_solution!() should only be called when using adaptive " * "timestepping") end if t_params.low_storage @@ -446,32 +446,33 @@ function rk_error_variable!(scratch, scratch_implicit, var_symbol::Symbol, t_par end end - error_coefs = @view t_params.rk_coefs[:,end] + loworder_coefs = @view t_params.rk_coefs[:,end] if t_params.rk_coefs_implicit === nothing - error_coefs_implicit = nothing + loworder_coefs_implicit = nothing else - error_coefs_implicit = @view t_params.rk_coefs_implicit[:,end] + loworder_coefs_implicit = @view t_params.rk_coefs_implicit[:,end] end # The second element of `scratch` is not needed any more for the RK update, so we can - # overwrite it with the error estimate. + # overwrite it with the lower-order approximation. output = getfield(scratch[2], var_symbol) if neutrals if t_params.low_storage - rk_update_loop_neutrals_low_storage!(error_coefs, error_coefs_implicit, + rk_update_loop_neutrals_low_storage!(loworder_coefs, loworder_coefs_implicit, var_arrays..., var_arrays_implicit...; output=output) else - rk_update_loop_neutrals!(error_coefs, error_coefs_implicit, var_arrays, + rk_update_loop_neutrals!(loworder_coefs, loworder_coefs_implicit, var_arrays, var_arrays_implicit; output=output) end else if t_params.low_storage - rk_update_loop_low_storage!(error_coefs, error_coefs_implicit, var_arrays..., - var_arrays_implicit...; output=output) + rk_update_loop_low_storage!(loworder_coefs, loworder_coefs_implicit, + var_arrays..., var_arrays_implicit...; + output=output) else - rk_update_loop!(error_coefs, error_coefs_implicit, var_arrays, + rk_update_loop!(loworder_coefs, loworder_coefs_implicit, var_arrays, var_arrays_implicit; output=output) end end @@ -806,13 +807,13 @@ be known at compile time, allowing this function to be efficient. """ function local_error_norm end -function local_error_norm(error::MPISharedArray{mk_float,2}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,2}, f::MPISharedArray{mk_float,2}, rtol, atol; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) if method == "Linf" f_max = -Inf @loop_r_z ir iz begin - error_norm = abs(error[iz,ir]) / (rtol*abs(f[iz,ir]) + atol) + error_norm = abs(f_loworder[iz,ir] - f[iz,ir]) / (rtol*abs(f[iz,ir]) + atol) f_max = max(f_max, error_norm) end return f_max @@ -822,12 +823,12 @@ function local_error_norm(error::MPISharedArray{mk_float,2}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[iz,ir] / (rtol*abs(f[iz,ir]) + atol))^2 + error_norm = ((f_loworder[iz,ir] - f[iz,ir]) / (rtol*abs(f[iz,ir]) + atol))^2 L2sum += error_norm end # Will sum results from different processes in shared memory block after returning # from this function. - nz, nr = size(error) + nz, nr = size(f_loworder) if skip_r_inner nr -= 1 end @@ -839,7 +840,7 @@ function local_error_norm(error::MPISharedArray{mk_float,2}, error("Unrecognized method '$method'") end end -function local_error_norm(error::MPISharedArray{mk_float,3}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,3}, f::MPISharedArray{mk_float,3}, rtol, atol, neutral=false; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) @@ -847,12 +848,12 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, f_max = -Inf if neutral @loop_sn_r_z isn ir iz begin - error_norm = abs(error[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol) + error_norm = abs(f_loworder[iz,ir,isn] - f[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol) f_max = max(f_max, error_norm) end else @loop_s_r_z is ir iz begin - error_norm = abs(error[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol) + error_norm = abs(f_loworder[iz,ir,is] - f[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol) f_max = max(f_max, error_norm) end end @@ -864,7 +865,7 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[iz,ir,isn] / (rtol*abs(f[iz,ir,isn]) + atol))^2 + error_norm = ((f_loworder[iz,ir,isn] - f[iz,ir,isn]) / (rtol*abs(f[iz,ir,isn]) + atol))^2 L2sum += error_norm end else @@ -872,13 +873,13 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[iz,ir,is] / (rtol*abs(f[iz,ir,is]) + atol))^2 + error_norm = ((f_loworder[iz,ir,is] - f[iz,ir,is]) / (rtol*abs(f[iz,ir,is]) + atol))^2 L2sum += error_norm end end # Will sum results from different processes in shared memory block after returning # from this function. - nz, nr, nspecies = size(error) + nz, nr, nspecies = size(f_loworder) if skip_r_inner nr -= 1 end @@ -890,13 +891,13 @@ function local_error_norm(error::MPISharedArray{mk_float,3}, error("Unrecognized method '$method'") end end -function local_error_norm(error::MPISharedArray{mk_float,5}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,5}, f::MPISharedArray{mk_float,5}, rtol, atol; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) if method == "Linf" f_max = -Inf @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - error_norm = abs(error[ivpa,ivperp,iz,ir,is]) / + error_norm = abs(f_loworder[ivpa,ivperp,iz,ir,is] - f[ivpa,ivperp,iz,ir,is]) / (rtol*abs(f[ivpa,ivperp,iz,ir,is]) + atol) f_max = max(f_max, error_norm) end @@ -907,13 +908,13 @@ function local_error_norm(error::MPISharedArray{mk_float,5}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[ivpa,ivperp,iz,ir,is] / + error_norm = ((f_loworder[ivpa,ivperp,iz,ir,is] - f[ivpa,ivperp,iz,ir,is]) / (rtol*abs(f[ivpa,ivperp,iz,ir,is]) + atol))^2 L2sum += error_norm end # Will sum results from different processes in shared memory block after returning # from this function. - nvpa, nvperp, nz, nr, nspecies = size(error) + nvpa, nvperp, nz, nr, nspecies = size(f_loworder) if skip_r_inner nr -= 1 end @@ -925,13 +926,13 @@ function local_error_norm(error::MPISharedArray{mk_float,5}, error("Unrecognized method '$method'") end end -function local_error_norm(error::MPISharedArray{mk_float,6}, +function local_error_norm(f_loworder::MPISharedArray{mk_float,6}, f::MPISharedArray{mk_float,6}, rtol, atol; method="Linf", skip_r_inner=false, skip_z_lower=false, error_sum_zero=0.0) if method == "Linf" f_max = -Inf @loop_sn_r_z_vzeta_vr_vz isn ir iz ivzeta ivr ivz begin - error_norm = abs(error[ivz,ivr,ivzeta,iz,ir,isn]) / + error_norm = abs(f_loworder[ivz,ivr,ivzeta,iz,ir,isn] - f[ivz,ivr,ivzeta,iz,ir,isn]) / (rtol*abs(f[ivz,ivr,ivzeta,iz,ir,isn]) + atol) f_max = max(f_max, error_norm) end @@ -942,7 +943,7 @@ function local_error_norm(error::MPISharedArray{mk_float,6}, if (skip_r_inner && ir == 1) || (skip_z_lower && iz == 1) continue end - error_norm = (error[ivz,ivr,ivzeta,iz,ir,isn] / + error_norm = ((f_loworder[ivz,ivr,ivzeta,iz,ir,isn] - f_loworder[ivz,ivr,ivzeta,iz,ir,isn]) / (rtol*abs(f[ivz,ivr,ivzeta,iz,ir,isn]) + atol))^2 L2sum += error_norm end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 6b4bccc6a..c36976f93 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -66,7 +66,7 @@ using ..gyroaverages: init_gyro_operators, gyroaverage_pdf! using ..manufactured_solns: manufactured_sources using ..advection: advection_info using ..runge_kutta: rk_update_evolved_moments!, rk_update_evolved_moments_neutral!, - rk_update_variable!, rk_error_variable!, + rk_update_variable!, rk_loworder_solution!, setup_runge_kutta_coefficients!, local_error_norm, adaptive_timestep_update_t_params! using ..utils: to_minutes, get_minimum_CFL_z, get_minimum_CFL_vpa, @@ -2021,8 +2021,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen skip_z_lower = z.irank != 0 # Calculate error for ion distribution functions - # Note rk_error_variable!() stores the calculated error in `scratch[2]`. - rk_error_variable!(scratch, scratch_implicit, :pdf, t_params) + # Note we store the calculated error in `scratch[2]`. + rk_loworder_solution!(scratch, scratch_implicit, :pdf, t_params) ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[t_params.n_rk_stages+1].pdf, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, @@ -2035,7 +2035,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen # Calculate error for ion moments, if necessary if moments.evolve_density begin_s_r_z_region() - rk_error_variable!(scratch, scratch_implicit, :density, t_params) + rk_loworder_solution!(scratch, scratch_implicit, :density, t_params) ion_n_err = local_error_norm(scratch[2].density, scratch[t_params.n_rk_stages+1].density, t_params.rtol, t_params.atol; @@ -2047,7 +2047,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_upar begin_s_r_z_region() - rk_error_variable!(scratch, scratch_implicit, :upar, t_params) + rk_loworder_solution!(scratch, scratch_implicit, :upar, t_params) ion_u_err = local_error_norm(scratch[2].upar, scratch[t_params.n_rk_stages+1].upar, t_params.rtol, t_params.atol; method=error_norm_method, @@ -2058,7 +2058,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_ppar begin_s_r_z_region() - rk_error_variable!(scratch, scratch_implicit, :ppar, t_params) + rk_loworder_solution!(scratch, scratch_implicit, :ppar, t_params) ion_p_err = local_error_norm(scratch[2].ppar, scratch[t_params.n_rk_stages+1].ppar, t_params.rtol, t_params.atol; method=error_norm_method, @@ -2101,7 +2101,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen push!(CFL_limits, t_params.CFL_prefactor * neutral_vz_CFL) # Calculate error for neutral distribution functions - rk_error_variable!(scratch, scratch_implicit, :pdf_neutral, t_params; neutrals=true) + rk_loworder_solution!(scratch, scratch_implicit, :pdf_neutral, t_params; neutrals=true) neut_pdf_error = local_error_norm(scratch[2].pdf_neutral, scratch[end].pdf_neutral, t_params.rtol, t_params.atol; method=error_norm_method, @@ -2116,7 +2116,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen # Calculate error for neutral moments, if necessary if moments.evolve_density begin_sn_r_z_region() - rk_error_variable!(scratch, scratch_implicit, :density_neutral, t_params; neutrals=true) + rk_loworder_solution!(scratch, scratch_implicit, :density_neutral, t_params; neutrals=true) neut_n_err = local_error_norm(scratch[2].density_neutral, scratch[end].density_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, @@ -2128,7 +2128,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_upar begin_sn_r_z_region() - rk_error_variable!(scratch, scratch_implicit, :uz_neutral, t_params; neutrals=true) + rk_loworder_solution!(scratch, scratch_implicit, :uz_neutral, t_params; neutrals=true) neut_u_err = local_error_norm(scratch[2].uz_neutral, scratch[t_params.n_rk_stages+1].uz_neutral, t_params.rtol, t_params.atol, true; @@ -2141,7 +2141,7 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_ppar begin_sn_r_z_region() - rk_error_variable!(scratch, scratch_implicit, :pz_neutral, t_params; neutrals=true) + rk_loworder_solution!(scratch, scratch_implicit, :pz_neutral, t_params; neutrals=true) neut_p_err = local_error_norm(scratch[2].pz_neutral, scratch[t_params.n_rk_stages+1].pz_neutral, t_params.rtol, t_params.atol, true; diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl index c2803edd6..f4f3c366d 100644 --- a/util/calculate_rk_coeffs.jl +++ b/util/calculate_rk_coeffs.jl @@ -192,8 +192,9 @@ function convert_butcher_tableau_for_moment_kinetics(a, b, sum(b_implicit[1,i] * k_implicit[i] for i ∈ 1:n_rk_stages)) if adaptive - y_err = sum((b[2,i] - b[1,i]) * k[i] for i ∈ 1:n_rk_stages) + - sum((b_implicit[2,i] - b_implicit[1,i]) * k_implicit[i] for i ∈ 1:n_rk_stages) + y_loworder = yn + + sum(b[2,i] * k[i] for i ∈ 1:n_rk_stages) + + sum(b_implicit[2,i] * k_implicit[i] for i ∈ 1:n_rk_stages) end # Define expressions for y_tilde[i] using the rk_coefs as used in moment_kinetics @@ -312,7 +313,7 @@ function convert_butcher_tableau_for_moment_kinetics(a, b, if adaptive i = n_rk_stages + 1 lhs = Symbolics.coeff(y_rk_coefs_err, yn) - rhs = Symbolics.coeff(y_err, yn) + rhs = Symbolics.coeff(y_loworder, yn) if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 push!(rk_coefs_equations, rk_coefs[1,i] ~ 0) else @@ -320,7 +321,7 @@ function convert_butcher_tableau_for_moment_kinetics(a, b, end for j ∈ 1:n_rk_stages lhs = Symbolics.coeff(y_rk_coefs_err, k[j]) - rhs = Symbolics.coeff(y_err, k[j]) + rhs = Symbolics.coeff(y_loworder, k[j]) if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 push!(rk_coefs_equations, rk_coefs[j+1,i] ~ 0) else @@ -330,7 +331,7 @@ function convert_butcher_tableau_for_moment_kinetics(a, b, i = n_rk_stages + 2 for j ∈ 1:n_rk_stages lhs = Symbolics.coeff(y_rk_coefs_err, k_implicit[j]) - rhs = Symbolics.coeff(y_err, k_implicit[j]) + rhs = Symbolics.coeff(y_loworder, k_implicit[j]) if isa(lhs, Number) && lhs == 0 && isa(rhs, Number) && rhs == 0 push!(rk_coefs_equations, rk_coefs_implicit[j,i] ~ 0) else @@ -458,11 +459,11 @@ function convert_butcher_tableau_for_moment_kinetics(a::Matrix{Rational{Int64}}, end function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}, + adaptive, rk_coefs_implicit=zeros(T, size(rk_coefs, 1) - 1, size(rk_coefs, 2) + 1), implicit_coefficient_is_zero=nothing ) where {T,N} using_rationals = eltype(rk_coefs) <: Rational || eltype(rk_coefs_implicit) <: Rational - adaptive = (abs(sum(rk_coefs[:,end]) + sum(rk_coefs_implicit[:,end])) < 1.0e-13) low_storage = size(rk_coefs, 1) == 3 if adaptive n_rk_stages = size(rk_coefs, 2) - 1 @@ -510,13 +511,13 @@ function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}, if adaptive if low_storage i = n_rk_stages + 1 - y_err = rk_coefs[1,i]*y_tilde[1] + rk_coefs[2,i]*y_tilde[n_rk_stages] + rk_coefs[3,i]*y_tilde[n_rk_stages+1] + - rk_coefs_implicit[1,i+1]*y[1] + rk_coefs_implicit[2,i+1]*y[n_rk_stages-1] + rk_coefs_implicit[3,i+1]*y[n_rk_stages] + y_loworder = rk_coefs[1,i]*y_tilde[1] + rk_coefs[2,i]*y_tilde[n_rk_stages] + rk_coefs[3,i]*y_tilde[n_rk_stages+1] + + rk_coefs_implicit[1,i+1]*y[1] + rk_coefs_implicit[2,i+1]*y[n_rk_stages-1] + rk_coefs_implicit[3,i+1]*y[n_rk_stages] else - y_err = sum(rk_coefs[j,n_rk_stages+1]*y_tilde[j] for j ∈ 1:n_rk_stages+1) + - sum(rk_coefs_implicit[j,n_rk_stages+2]*y[j] for j ∈ 1:n_rk_stages) + y_loworder = sum(rk_coefs[j,n_rk_stages+1]*y_tilde[j] for j ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[j,n_rk_stages+2]*y[j] for j ∈ 1:n_rk_stages) end - y_err = simplify(expand(y_err)) + y_loworder = simplify(expand(y_loworder)) end # Set up equations to solve for each y_tilde[i] and y[i] in terms of k[i] and @@ -545,17 +546,13 @@ function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}, b_implicit[1, j] = Symbolics.coeff(y_tilde_k_expressions[n_rk_stages+1], k_implicit[j]) end if adaptive - error_coeffs = zeros(T, n_rk_stages) - error_coeffs_implicit = zeros(T, n_rk_stages) - y_k_err = substitute(y_err, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) - y_k_err = substitute(y_k_err, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) - y_k_err = simplify(expand(y_k_err)) + y_k_loworder = substitute(y_loworder, Dict(y_tilde[i] => y_tilde_k_expressions[i] for i ∈ 1:n_rk_stages+1)) + y_k_loworder = substitute(y_k_loworder, Dict(y[i] => y_k_expressions[i] for i ∈ 1:n_rk_stages)) + y_k_loworder = simplify(expand(y_k_loworder)) for j ∈ 1:n_rk_stages - error_coeffs[j] = Symbolics.coeff(y_k_err, k[j]) - error_coeffs_implicit[j] = Symbolics.coeff(y_k_err, k_implicit[j]) + b[2,j] = Symbolics.coeff(y_k_loworder, k[j]) + b_implicit[2,j] = Symbolics.coeff(y_k_loworder, k_implicit[j]) end - @. b[2,:] = error_coeffs + b[1,:] - @. b_implicit[2,:] = error_coeffs_implicit + b_implicit[1,:] end a = zeros(T, n_rk_stages, n_rk_stages) @@ -615,10 +612,13 @@ function convert_and_check_butcher_tableau(name, a, b, if size(b, 1) > 1 # Adaptive timestep error_sum = sum(rk_coefs[:,end]) + sum(rk_coefs_implicit[:,end]) - if abs(error_sum) > 1.0e-13 - error("Sum of error coefficients should be 0. Got ", error_sum, " ≈ ", Float64(error_sum)) + if abs(error_sum - 1) > 1.0e-13 + error("Sum of loworder coefficients should be 1. Got ", error_sum, " ≈ ", Float64(error_sum)) end check_end -= 1 + adaptive = true + else + adaptive = false end for i ∈ 1:check_end if low_storage @@ -654,7 +654,7 @@ function convert_and_check_butcher_tableau(name, a, b, # Consistency check: converting back should give the original a, b. a_check, b_check, a_check_implicit, b_check_implicit = - convert_rk_coefs_to_butcher_tableau(rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero) + convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) if eltype(a) == Rational if a_check != a @@ -704,7 +704,7 @@ function convert_and_check_butcher_tableau(name, a, b, end end -function convert_and_check_rk_coefs(name, rk_coefs, +function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, rk_coefs_implicit=zeros(eltype(rk_coefs), size(rk_coefs, 1), size(rk_coefs, 2) + 1), @@ -717,7 +717,7 @@ function convert_and_check_rk_coefs(name, rk_coefs, if imex print("rk_coefs_implicit="); display(rk_coefs_implicit) end - a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero) + a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) print("a="); display(a) print("b="); display(b) if imex @@ -909,6 +909,12 @@ convert_and_check_butcher_tableau( construct_fekete_3rd_order(4)... ) +convert_and_check_butcher_tableau( + "Fekete 4(3) not low-storage", + construct_fekete_3rd_order(4)...; + low_storage=false + ) + """ construct_fekete_2nd_order(nstage) diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl index 1ce3b7832..dd77ab06f 100644 --- a/util/test-rk-timestep.jl +++ b/util/test-rk-timestep.jl @@ -52,7 +52,8 @@ function rk_advance_explicit(rk_coefs, y0, dt, nsteps) #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3 #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4) if adaptive - error[it+1] = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + loworder = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + error[it+1] = loworder - yscratch[end] end yscratch[1] = yscratch[end] result[it+1] = yscratch[end] @@ -101,8 +102,9 @@ function rk_advance(rk_coefs, y0, dt, nsteps, rk_coefs_implicit=nothing, implici #k4 = 6*(yscratch[5] - yscratch[1]) - k1 - 2*k2 - 2*k3 #println("kcheck = ", k1, " ", k2, " ", k3, " ", k4) if adaptive - error[it+1] = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + - sum(rk_coefs_implicit[i, n_rk_stages+2]*yscratch_implicit[i] for i ∈ 1:n_rk_stages) + loworder = sum(rk_coefs[i, n_rk_stages+1]*yscratch[i] for i ∈ 1:n_rk_stages+1) + + sum(rk_coefs_implicit[i, n_rk_stages+2]*yscratch_implicit[i] for i ∈ 1:n_rk_stages) + error[it+1] = loworder - yscratch[end] end yscratch[1] = yscratch[end] result[it+1] = yscratch[end] @@ -244,7 +246,7 @@ methods = Dict( a = Float64[0 0 0 0; 1//2 0 0 0; 0 1//2 0 0; 0 0 1 0], b = Float64[1//6 1//3 1//3 1//6]), - "RKF45" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 -1], + "RKF45" => (rk_coefs = Float64[3//4 5//8 10469//2197 115//324 121//240 641//1980 11//36; 1//4 3//32 17328//2197 95//54 33//10 232//165 4//3; 0 9//32 -32896//2197 -95744//29241 -1408//285 -512//171 -512//171; 0 0 7296//2197 553475//233928 6591//1520 2197//836 2197//836; 0 0 0 -845//4104 -77//40 -56//55 -1; 0 0 0 0 -11//40 34//55 8//11; 0 0 0 0 0 2//55 0], a = Float64[0 0 0 0 0 0; 1//4 0 0 0 0 0; 3//32 9//32 0 0 0 0; 1932//2197 -7200//2197 7296//2197 0 0 0; 439//216 -8 3680//513 -845//4104 0 0; -8//27 2 -3544//2565 1859//4104 -11//40 0], b = Float64[16//135 0 6656//12825 28561//56430 -9//50 2//55; 25//216 0 1408//2565 2197//4104 -1//5 0]), @@ -268,13 +270,13 @@ methods = Dict( a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2]), - "mk ssprk2" => (rk_coefs = Float64[0.0 0.5 0.0; - 1.0 0.0 0.0; - 0.0 0.5 0.0], + "mk ssprk2" => (rk_coefs = Float64[0.0 0.5; + 1.0 0.0; + 0.0 0.5], a = Float64[0.0 0.0; 1.0 0.0], b = Float64[0.5 0.5; 0.5 0.5]), - "Fekete 43" => (rk_coefs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 -1//2], + "Fekete 43" => (rk_coefs = Float64[1//2 0 2//3 0 -1//2; 1//2 1//2 0 0 0; 0 1//2 1//6 0 0; 0 0 1//6 1//2 1; 0 0 0 1//2 1//2], a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2; 1//4 1//4 1//4 1//4]), @@ -282,19 +284,19 @@ methods = Dict( a = Float64[0 0 0 0; 1//2 0 0 0; 1//2 1//2 0 0; 1//6 1//6 1//6 0], b = Float64[1//6 1//6 1//6 1//2]), - "Fekete 42" => (rk_coefs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 -1//4], + "Fekete 42" => (rk_coefs = Float64[2//3 0 0 1//4 -1//8; 1//3 2//3 0 0 3//16; 0 1//3 2//3 0 0; 0 0 1//3 1//2 3//16; 0 0 0 1//4 3//4], a = Float64[0 0 0 0; 1//3 0 0 0; 1//3 1//3 0 0; 1//3 1//3 1//3 0], b = Float64[1//4 1//4 1//4 1//4; 5//16 1//4 1//4 3//16]), - "Fekete 10,4" => (rk_coefs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 -1], + "Fekete 10,4" => (rk_coefs = Float64[5//6 0 0 0 3//5 0 0 0 0 -1//2 -1//5; 1//6 5//6 0 0 0 0 0 0 0 0 6//5; 0 1//6 5//6 0 0 0 0 0 0 0 0; 0 0 1//6 5//6 0 0 0 0 0 0 -9//5; 0 0 0 1//6 1//3 0 0 0 0 0 9//5; 0 0 0 0 1//15 5//6 0 0 0 9//10 0; 0 0 0 0 0 1//6 5//6 0 0 0 -6//5; 0 0 0 0 0 0 1//6 5//6 0 0 6//5; 0 0 0 0 0 0 0 1//6 5//6 0 -9//5; 0 0 0 0 0 0 0 0 1//6 1//2 9//5; 0 0 0 0 0 0 0 0 0 1//10 0], a = Float64[0 0 0 0 0 0 0 0 0 0; 1//6 0 0 0 0 0 0 0 0 0; 1//6 1//6 0 0 0 0 0 0 0 0; 1//6 1//6 1//6 0 0 0 0 0 0 0; 1//6 1//6 1//6 1//6 0 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 0 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 0 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 0 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 0 0; 1//15 1//15 1//15 1//15 1//15 1//6 1//6 1//6 1//6 0], b = Float64[1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10 1//10; 1//5 0 0 3//10 0 0 1//5 0 3//10 0]), - "Fekete 6,4" => (rk_coefs = [0.6447024483081 0.2386994475333264 0.5474858792272213 0.3762853856474131 0.0 -0.18132326703443313 -0.0017300417984673078; 0.3552975516919 0.4295138541066736 -6.461498003318411e-14 -1.1871059690804486e-13 0.0 2.9254376698872875e-14 -0.18902907903375094; 0.0 0.33178669836 0.25530138316744333 -3.352873534367973e-14 0.0 0.2059808002676668 0.2504712436879622; 0.0 0.0 0.1972127376054 0.3518900216285391 0.0 0.4792670116241715 -0.9397479180374522; 0.0 0.0 0.0 0.2718245927242 0.5641843457422999 9.986456106503283e-14 1.1993626679930305; 0.0 0.0 0.0 0.0 0.4358156542577 0.3416567872695656 -0.5310335716309745; 0.0 0.0 0.0 0.0 0.0 0.1544186678729 0.2117066988196524], + "Fekete 6,4" => (rk_coefs = [0.6447024483081 0.23869944753332645 0.5474858792272213 0.3762853856474131 -6.304828384656085e-17 -0.1813232670344333 -0.0017300417984673633; 0.3552975516919 0.4295138541066736 -6.460461358323626e-14 -1.1868936325049587e-13 3.608184516786869e-18 2.9392365006883485e-14 -0.18902907903375094; -0.0 0.33178669836 0.25530138316744333 -3.3545605887402925e-14 -1.0929532856876731e-17 0.20598080026766677 0.2504712436879622; -0.0 -0.0 0.1972127376054 0.3518900216285391 7.036963218665071e-17 0.47926701162417157 -0.939747918037452; -0.0 -0.0 -0.0 0.2718245927242 0.5641843457422999 9.97599117309567e-14 1.1993626679930303; -0.0 -0.0 -0.0 -0.0 0.4358156542577 0.34165678726956566 -0.5310335716309745; -0.0 -0.0 -0.0 -0.0 -0.0 0.1544186678729 1.2117066988196523], a = [0.0 0.0 0.0 0.0 0.0 0.0; 0.3552975516919 0.0 0.0 0.0 0.0 0.0; 0.2704882223931 0.33178669836 0.0 0.0 0.0 0.0; 0.1223997401356 0.1501381660925 0.1972127376054 0.0 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.0 0.0; 0.0763425067155 0.093643368364 0.123004466581 0.2718245927242 0.4358156542577 0.0], b = [0.1522491819555 0.1867521364225 0.1555370561501 0.1348455085546 0.2161974490441 0.1544186678729; 0.1210663237182 0.230884400455 0.0853424972752 0.3450614904457 0.0305351538213 0.1871101342844]), - "KennedyCarpenterARK437" => (rk_coefs = Float64[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000; 247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000; 0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000; 0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000; 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; 0 0 0 0 0 0 247//2000 -29//2470], + "KennedyCarpenterARK437" => (rk_coefs = Float64[1259//2000 5290646302898597//8373961392408000 8423019873483076625875775070651744355694385798609904937007939962921//150521175132586175452031742950672295931748410227157096692266429460000 2994513382955822485521046980912234681324963183189745389014355133307701665733190659992515397312853494866472292886256590592494915387//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065000 30539970275135679633584632348554217845550139791434491473433065741424255490119246194598789137339313385221967019548261328863657459815753917023734449603158284970462973658597410429650430702665200563167161539643//13704862650330987205714124331836761942666326434854101824180425078097998709961464232104783253918380815197539403091139963512106850869604383687916916407395432488294630299106508922071626689111782004817881600000 -19040678234218442267791957547085949877261833393218769774490211844357442495720928310803217673452945114307858653570770743722530691547161857714592037194448292737108135956064173232077902035051756906315290249323391199215452093288761543466155072487019283153040605640582545353//434460620365086227275365613595785689037684119561551447866399563678359364964304766160588180361574468026545002066358107854844508866339943817889423954205635510933928478885046494597924763194047299856692216836659998842171053550088349173699526518609668649583153524558000000 288731630022162319869438999191684090646040643450899591816164044389835222770755887620657396004427344292026826164251174295940084430575464251867753391273329991428766006962036541371088185016838939694742825762143476610215112259310775986858990019786106138952556466469818422670975238869429118972933807274018940951589368081351548344612945911//20973003965589548708434314855006387742137826247026382293972477180137413647707040377066768697931519202069488796547924545511172033369848624623987910615835501364546356391125295257989950347130885375821004661709022933531613399512675984933594553629478794041110851618556180635106168633352649598677887127603362572483995718065757513848000000 84926642764971243894475134943962931810444673911735634915100592292957077949431848662551468317687921438318511523760815574702789503485398620028309816648298294834153366303807932815889792557943196419058598328094000505490239045609005350465572648576161106088544859912831113322833512972633007203117374432262835343768993715225884506359744564475587764680843945362752279138159903160761204959//260972014540685732500941228716594401253697451754294212135043621433819057442875621317591319960743124933373694568320156829732345991594970230575152480441468737290148399986074743051568390414584640252689524964664950956646999867002330201896464020174989429698298196815004391658613009073755531263975373728451196159059154355539770936746765576373862915582488531900995626740195393587760000000; 247//1000 -989824741407403//1034184231962388 12030115115653867750152716618470210758833187291724517144940796943//75260587566293087726015871475336147965874205113578548346133214730 -38294840366277686210014997045193699763084799480894125070642047208039500243910125715098952557814588229683929175932575120507758//2965873493603904869657209592263579672060858985048910013379293383705776295507221134405229631810912000404727243855383598634762065 295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//61733615542031473899613172665931360102100569526369828036848761613054048243069658703174699341974688356745672986897026862667147976890109836432058182015294740938264100446425715865187507608611630652332800 -103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//1957029821464352375114259520701737338007586124151132648046844881434051193532904352074721533160245351470923432731342828174975265163693440621123531325250610409612290445428137363053715149522735584940055030795765760550320061036434005286934804137881390313437628489000 1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//94472990835988958146100517364893638478098316428046767089966113424042403818500181878679138278970807216529228813278939394194468618783101912720666264035295051191650254014077906567522298860949934125319840818509112313205465763570612544745921412745399973158156989272775588446424183033120043237287779854069200776954935666962871684000 4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//10579946535433205371659779542564637888663410206255170762231498166235907734170633296659107565976072632434068698715682033637797810470066360698992668126005489349600610810246273366955475287077755685919845606675606119864067562175770143320126919736823895798579656627635313175349176043530629645836839475477751195637533284684044767705949955798940388469560345887878201084061975415720000; 0 2694949928731//7487940209513 8998324425985185229752164921843556928418//6879911577066686708082765415963101392005 -1113749691734484577393721387476449364293695591499248096021365233437237262190376983790684968//614723064777942345300673611647494298409492630748950625564212151899381047684846341907434565 3232749502455777147099949130361796695276166220371216781206693407480970206448819780667318974467270737609589333827843458744644740168302532466186611141754331//189582997729714290713265545572615912994127195399317804061465277614709498987796247842084387314678472277548705650701196377776893051375514851341440850875200 -4870194067553746005392433554393767454323970473452014417195456308395019908797901907202628017224416587814584976946420122287071436281684631404642172500643426697814174404515180399709158516683162105836131515362388110129683638667528993//21343197949358935830336366563982013835567480844791522290459133182821516998314822953834142105584655472803157904439421128859575015047441788494355858675117408374452309707414448855197609419898595868573273357109375195601907874661750 1133640531577714301094463790230570404584859296215495398405151332049144471525210024993808359315202761280905403079519602164500478641718437221313878517301376096168046994391334699315934152212733388857744580032179121377464302776620604944135207945388255432571250499946633194518861052173//15269743319892007455140684408468757761408687980991560114683247369534160706810144792488732725360073361204871896366796514755108037806954494436799836307013539397248756798832458082570495407212986994247502838542705261851137819990892696985793851053928235797745833864984886152117549000 2893818794357948608396224652970468062472459880296424811982545951332742248161861507594854114831155818553085130655146195415611877204165404047936261527497967416640034320042470107097161887686354604209343668647869967015394820299208885831731185499722116534661350732567244113640955295285049989870219686790318194239544599451563105257133//1710045024558522147077712925318040863324335720844158150953699472338859250480805706497840549035944636590651925322772241392509045721953834946328464956986918939857303509645058517346927758905551129327891825404013534181813392627174511472243186476068520859982949529412273255989549686977234401171431731307352748128089844902467651170000; 0 0 -952945855348//12294611323341 -22073573743301541610712262679571236673//102497573414903811687310385575877661106 -1721410795387108339409971682021950898460420032371617016873094528434128927392613849754588339//87137142052407093121638255236903751954999399574863919133666611538546309140586170055319680 16313517457306668499040607903231659478959795579778899115572743239581171711329724761584530868990247993612666499584567250035429887343299192678690030002344049//62416302275874709124142209806047948969179330019834923487644841352294781949250993050168150304800088551996355377414194373918898008246028356490083758823900 -3522114925398586688095375690918293852014330258883220414302055832172071373253315487145747264610273932349849350380058343538480611104076920529217826992141264292169120822384426292339080424329190906960608882147//40700967645824834858910387658090999284501829591792512374361650786103602925853405179284205837319813016693834232729849885086983845902959918661378045151742047433271863508697358129908893348972361198906999600 -2964615015014179805295172754296611663749294757017026824055697955567711725761450449626912955847752550279125632083736465384517105390439711240732035285328566095702816892096481374832505284217977117233570295284209999214909138342054284123934619995605076214529//1519355101116665488160320486425332596410766015376101261032729360600325388596256808357121832346062378794250401199007985315934407267758119256854060437954663211883904942752463071332521831780921316532009162049372788856501274262524564791159730973728017956000; 0 0 0 1723805262919//4571918432560 -25876943084012456170678693260966349907000//5680618296291396887153903820876753773137 48844701120434798505257977500359380914524723337647143148599611849985789938064739097674693920//866184382793087165443239722235018033562332770654938221330767289744734883576318512798157591 -10439201328524415451101705838850752098522244583730511249035532393853115936152097267231153987129654894580734726923948255514483695320//517179273612462951325391504144113415822567772911339018044947272294653098377564085323877210889868378806161859809936951216035085879 -31792106067067045818112260837108131508726364490393328197498519784851127422528513551342159965127955715285413010230702025548328167661456142039693351348408082614337262676//66396651076396923064500150569526548836429014778004208000488845377413771691299301838459832995249234003843920958668478089089157501642918765520740904300173384489982847721; 0 0 0 0 -1428733748635//8843423958496 45167606322154409493844473095934856651372//7158213738657143615497509498481377893535 -1615099588584428850432566812602172779484150467417635643452543010181911434924813//328599571910176348051406456035570993083111686686639606428802520754094008735795 -4034687914592733378429707314152809369441595752859107808382503309002756873129991192828401438988938141477//34345686420160340136137214349452622351601924975531611734364160909677419579508067395379029901259679245150; 0 0 0 0 0 11565764226357//8513123442827 -25809210976654570172323689//32261698729236896469921844 31493257163953445767383915813738522959//6188068127939952207906691591618012272168; 0 0 0 0 0 0 247//2000 2441//2470], rk_coefs_implicit = Float64[1 247//2000 989824741407403//8373961392408000 -12030115115653867750152716618470210758833187291724517144940796943//609397470172413665797699364172762331707483442215210917782455180000 19147420183138843105007498522596849881542399740447062535321023604019750121955062857549476278907294114841964587966287560253879//12007584994347792994563601588111658591339510060926761187770418557513264354280247507713480290732437248602134590507625905403895000 -295295570231675996693315167776780747152571140671795256274243785364473032721842828048391983319277155679830435409182467618416827429676745188054309175736589897273331671973890410913992022974171287068414179//499867332324141489065693705797015061555470198594087676411730863263595532332547843750402423821657395601179538355441513058033586857409796246413426575022629481281490691873892436155364434077826968844800000 103607762222490505420853358572242884724507274765889279862093763910463346807234121781702833150902639990159657168588480823754779677586383897844592436246137396292078029780455443152811826226940327856890549206867400186099307876348070477253089682709554337188513182823809//15846395315500828948293599357908804356336729750211600389043278392178552174355500826515963831257047380331363827784152454858099313066343648754036690892717493195241218181604351117843847364556563440810162192678265267613927619728210569124978171156934334521762174000000 -1756980287380270661909883701400719407931186906833821727133555062583564555413167571820171278734011901786553078780566040820580897865510288379254954688057860520817526060415179421086733406546370539399123710022886030886680529570815674651094279571160030365101231973109840311106887692153830760503248639774555086269823425452980789521983//764963488550517879725510262063916101037233331401188397489604157279695577477734266224122577157658358028576751524525825054206223633871270548345475822148138066329151854365003292044714970534007563767771990433272164479396483915551518580938634921015384398041756998160126222238252494195303993824192549425661544752671543862047544000000 -4497481478618828357902975315180513548695118296897040369787574112992144752316100046645078788641717287509607925626450106980120209292833281864629468608251949921103739384618553805425857887380202308238545540652588351850372157487811335028912567342994696086555165285879240439813676160036107905425561960935961331946051968043421127130927398396885118470213706720569254204159456782778143//85667583282859962523560967955988970758408179807734176212400794868306945215956544912219494461344717671530920637373943592208889153603776199991843466607331897567616281864342294469275103539091139157245713414377377488777874997374657030932201779245537617802264426134698892108090494279600240047261858101034422636741160199870807835675708144120974805421541262250025919709003849520000000; 0 247//2000 1640162795743102475350151093//1935977423638871147963949261 -669520662365112558836337514177765438740501906951800//3434894046659157185836586522283340462482027689855253 3946917245375300099117028390196313070898965018251827990344948235886761188759982607841249401044014800//8731687803837463597851852204375440308270070845281864536173093106185812052200062608725343371330114879 -205400344379039456236889403418786715948678188681075725573409873825666119392008156332013686312028931603996403696782121273525274842696005527984325836413553777161305//89363383017861228225303962341868491699836686835444580475507268771312849840752079176917079323598733222511329293960320776917936050196860239819503153329236587045136 30687505472431132535398127411347221742615518129027192717809570222837123760440722268522243759249777774158885828261806787110459137168616203275440545863270163627249417471268646467826870981867030746658885027755639865911983778862//1133170665462309851631677012105217332859851084790774917074074021640837762020794312552555302543238883393727692629571220902396774550231187040084644756043645934216506528391671558626974497474800652364423277959876832348439939747 -6283525646833647454002892632282410246801087091050293278650123969383191268291122461341921080069123066751425002628983740840429970668280064678807240424673718814438405438302314529802406401591711043517178423219353227983220474197585457883457827139444416354631995314601988204663707433738723579//636073193374428113494435816226054457408386696306335042416362660226889698324156499448503155209934236569286275656030656961509515881881917957453625064846593883601537604025882329382852749987796068169486806444822362282067831972375349383462186775277444140348881039842682407991538426680968962 -5111399490648784623505405906880770861485683214132379586079815455918757228788561547090828993593721487763199937324034708336117675901305459316990878813671055951639549111107993366169146620054742699199566402315746577078472396338683439114520127236419852170565949886333950618992185026169588984547653272289747956394678487224897575893472996553//23744424443337318051143623666623532897092042087880269588186878183995047296072436550183424467565368665201724952138208425897034422691189645477159617431842320555598909677789228593651627069755898900855393242632756026860305068214302197202618797165584108476518318651533651241430264302060599862244313590743654489782747825300717170452010145820; 0 0 247//2000 -2972678418645402786341338364//12838151569953855298061689287 -14177902844174227269025142717297051203475652491333500//11971105784073717652462253600880425542713752681105087 -100148767553896799794460313449068596317708839495767869203993116264461768408264073121941920171989860225//16896980983343158542755332581698141314998069944341701280946967597382042773067583208093579816862254704 8120587802228218371525098119582199057503340092685175907611832343053507998800789197285606422127482797666378809500665961580494440814493887170697877672294810170691010//77414698954417744943691534221904770957339164270679599444552137006106151217420120158205166736460436330696537591532832275880234436981438762980759441700502891030827 -773431832654955771723705915527504690565625557946561612427689605734312849813109402940104692058813867875775302143309398169386342513814875087912054850684653412041387124455620214615519952957068564604440450567357722605//27692724047854711512168597950120181690998527229673939994235410614905537717037560897782708599532214026735280560758431733216205801827405703523808488247547552157893603286263272072905320930368075417471639053737763218 -1303404354434948517428356388978807213640989814097796284872851954122942850939247373256252247771849635395296815861618930858082321997548690453745717952318803556064569049629447371161223768908840614968332185016381909654224705552772116843629887797345246826110167236923//1860770177301487311499340868274363903273885901513548551661642580585076571165583837547959561673448191237064888114034470178211592939044370653201543317609208350146901593536799945407827170076630727738853296085239724937440199422317066971204713970176240621839897829164; 0 0 0 247//2000 538260754703221409274894839//225772174112649072819826640 53717436927136847537872396533404737469775216052354000//7927277355078818621920036006972820247636839819935349 -1772830339659539491048538392985299701647037422231103660763419873677962619701225032539957600128007666810800//15721979495129036484463918711394582943772656542660620350548585102102545643714934169988943899054128147359 23300798642481300915550244810330608079712179111290003575082920574543585477524931007510835130377963055801601626441651388811757948939347006283700//721721356793285693541681937242605189550706405472415761047580279465244653002939927725991664966944137182650520997040379092802938837804294847283 7898475573277855254317959379657908427305225639249224806096111567367259892230833277244398214475361481979103570897581069354516256084225361210253104267762350814757343118967653292590//10295136532244819865134216418581425896312845076778105967882093451812315373150919833368240666957561175670654409595846292949450822030580980616104277250108095432715677497120580742213; 0 0 0 0 247//2000 109149106916529224225613938235//31652606811075124885796735456 -1471402139169815526549951787477624798736224552941321200//56440086799582050350922834838054057420026494337598863 5412580004078613161687981458637008030233366457113868199883389551037522158772900//455213269145024092215621169612878312797315768577504391565569423807316058315843 3820395190970937465086284368007694267392689261092114936550556797062450867925288189416331303649552143230//14273858093118406255740963176533941671573423886510381570104226394395061792966349825149357548274393367193; 0 0 0 0 0 247//2000 3702251939282354375344210899//4399061605898178118260737311 7335745460336671146051037364344432719375//4167718263865310401834285721625989602073 11934751738672605596266458500552561610251142174626625//266467933294598164559149808395021381193202692438810102; 0 0 0 0 0 0 247//2000 1753//2000 0], implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0], a = Float64[0 0 0 0 0 0 0; 247//1000 0 0 0 0 0 0; 247//4000 2694949928731//7487940209513 0 0 0 0 0; 464650059369//8764239774964 878889893998//2444806327765 -952945855348//12294611323341 0 0 0 0; 476636172619//8159180917465 -1271469283451//7793814740893 -859560642026//4356155882851 1723805262919//4571918432560 0 0 0; 6338158500785//11769362343261 -4970555480458//10924838743837 3326578051521//2647936831840 -880713585975//1841400956686 -1428733748635//8843423958496 0 0; 760814592956//3276306540349 760814592956//3276306540349 -47223648122716//6934462133451 71187472546993//9669769126921 -13330509492149//9695768672337 11565764226357//8513123442827 0], @@ -305,8 +307,8 @@ methods = Dict( # The 5th order KennedyCarpenter548 method seems to be missing the 8'th row of a_implicit # coefficients in the Kennedy&Carpenter2019 paper, so this is not correct. -# "KennedyCarpenterARK548" => (rk_coefs=Rational{BigInt}[1//3 170659747039489//317619014645556 1059761918771198374975975493583149778060443922616025115820976221//1221160207236679352040231791426238051865028458046356668183805968 6026949071202260192816151438453311527906342956798869364622169520394998781714920581981427863823607816039034882834095728827//611556923858701481314679349666018470018187199735628661499066355092054710782598046711683141284138645465506265796134149584 574302772466800635565942302880362733547172772643551904790822110826901330458802177285016647695347313333655288473150923245477469999353398479172681461629222234982048270948664718039299655881093933502451//150834125743572692057315590407717781072772261784442486315710184301611482636191181393913904998344549476475522992537979253944606756561784457508281554611374619511508513230391440260514965717678431926080 11804841191205866409738141418797344154150165781776069482717283225649753549804892986029880618638519186729029736737972278367957024018786254197094466036484982024080815664456754857338763116916399671315927783397636251797109720480473939029956507226170091189075837293613095202295448304114799//316529629859882632343675363951000175954808867120997357850157402897970774856378239676060399245608650982082301165291347152499432091243523124311078123020152833536134440536363380623223806386341610919656626785819858198938684429370281223424126948673612559884172118728379442353474712938880 205479529932329908716282413893618829637818370089241804929941483282179075703631150515643272392025819491441486340246313289022355398613147479287030479051759027065895455940962205315837210089822115315763871475524927356818199319782481010864996104849201569232761201059997695433556990185146963462480490411870245772241162665421012480742396686320042543732181343578393//5260860690207262979589344526020288103568995000362022999215204542913782791438284170690968045317071607104586535740950151469853326262915929889090860121924832019578588270175179588839072445427432012552661380649474236988741686278155158173159184181471487357026228329769096613600034623019687639534746341002493095888508157629508644724676106792267171767323953113600 -31073886185247651667067360120004278123539463314544688434634859477262281424368417682864989098625362184258655551942455231016080227541207487450456834716812464333109464818293826368695129979874437319388225475567363925740863990100114319609773199401453239914173351840343142392793986601320935346482664846634007108244761611421925874975284788092831794173603812123632127765067692113096713202915904166386621637429972396638482166795168390506001//205364533394638024075238227463140747921299540559472003068986564247853675492701798914552117976132500817408280838093109002246273402713204252739854979840561553732661829119263569894232347484336103156662483556486877970127505583089385295967632167610081341996276964887677919554714672505647564062872298807872882874244750782276807503913044362965995487634343580478340804213310079872837580199084484018436699646596793275992180669063759904768 1355106737355152460804424095630272213643929537644340902552944500194072220292762643530899993169071944828216401904819834895492706473204035420495272675010429289871147187217913280587665812877578699953287633363827598340626753544810744419524708014536091930419289921773711543518130861012168892746775975065634473740761885966889604878495107361010682453148097856546629634991209230617564181332375433303760587947068414808055196175764331527277384554187746358610192408611064850683460450152684230908509819835540315417629041//40682939169547015068145909076369356849344912507687127704642447159647492810337548269748338234153453270350117883220210855423331000788143399130922229954914310978098076329357245823099756637699294796313351066995366854135786629191377508136276798811699324277682691177669468459035819933886457267269032009956138595704540849884086612898962324823390614836495694603671787505773677235190081814562655593388519100675525221353268195703733221510379155250399221985970770068746195650662071392042888445769873162504653283328000; 4//9 -33777256972339//35291001627284 70523709557357325353926362451393185076417954760652143178918227//135684467470742150226692421269582005762780939782928518687089552 259173275379895127458760576309330523532337951147970777869503405464585562881462085641713656867342514214174126194545550287//67950769317633497923853261074002052224243022192847629055451817232450523420288671856853682364904293940611807310681572176 34483675781191648110786485447949476841349802351981788326801039637143030060467265358101399325091272262829110785552388913826586855287191388294264020004018018771784951430241000078153131805692088161919//16759347304841410228590621156413086785863584642715831812856687144623498070687909043768211666482727719608391443615331028216067417395753828612031283845708291056834279247821271140057218413075381325120 37972639708348074764863510493824251598614777393926316069246767548532985823720464466543328302337241205729880021748560977783654106318361163996656332009862268634667824273622244260705413151982804248607416793104785645839386988240914576932950229734443760364294642720771732116535265641883877//1652988067046053746683638011744112029986224083854097313217488659578291824249975251641648751615956288461985350529854812907497034254271731871402296864660798130688702078356564321032390988906450634802651273214837037261124240908933690833437107398628865590506232175581537087845923500903040 675361318822696286582373698984422416980652600989947732274664396784893998350699958365528677645259735958564356945089045713897031190536030043270123125611921244593280607804710357155969107797082924569677147406605660739001613329726734039554300060040262414311187767235888588699616374393926701368049944511958912556993822054410035807555792147417285087351663706964339//27473383604415706671188799191439282318638085001890564551457179279660865688622150669163944236655818392657285242202739679898122926039672078309696713970051900546688183188692604519492933881676589398886120543391698793163428806119254714904275739614351100642248081277683060093244625253547257673125897558568575056306653712065211811339975224359617452562691755148800 -14654782196644443978643077431729985169483648884762410390029797637990901973385611446945854901190651457791071027387784893201190913834296025743656446253760565138790278257703135364502640573240985081692965718431740687526063899064740113674831492617564146000207118510633012932973784580911032600355276047390516684914006149750898178961841693685461865667072601334715379620650194897355808646711454006049738723680812812767482148674909363385677//153208461421396621135495185567739923052398069941193399114958230470620996002491818237840468966321072038383955545878986081040870633770168252044018794484228460721192158231831552143316195742282489656557725827855289914222107339765096966832995426629743223394047894440013686017009358853419611284999968951905166588722274393127142106093858493006377586012922988610825679333739265936878829672332869029627379101429353713835436372158678024192 4461407513830220875580251050725490334550039459522874983321802400684610393685337405358142021414207715460029597992569661824287292651719118056453635029394091853700171157539178998334123264701613727496982908769111231517264264427131041129298949334584170031908182425677961537622802682862369968557627139451073916218048192757202579311445016491504569877930402285098941281638204030005082843744035332231113981837799491035741510194007002757944974689989789973121919931578599142856207780304241152182820589861967884680533563//212455348996523300911428636287706641324356765317921666902021668500381351342873863186463544111690255967383948945705545578321839670782526639905927200875663623996734398608865617076187617996874095047414166683198026904931330174666082542489445504905540915672342942816718335286075948543629276840182722718659834888679268882728007867361247696299928766368366405152508223641262536672659316142716090321028933081305520600400400577563940156776424477418751492593402910359007910620124150602890639661242670959746522701824000; 0 1183333538310//1827251437969 136607349717583458493057707621706//9411247739582139928720185864476093 -1222051897984293100889925908401868213380468642566580792951727180058723335646058631//9650671855308101978247774429296270313350157698860544197688698633892935233480512564 7233278175208784929821520367114267945040071552896618565825119457684675103820679881680792153156616614798360649894867022162840408524250937322223681//12186741596298968245256194155228665632735178046355106133463062982670602750335160180405169285972691766413675449216819961341316742509625063392179280 20484570779422213136105387052366878491025225861133270150585262733025655122846907974494099750739628594257427928954870893197048531281431054105510102176215838999349988480998827197029944036701651389097752980608319168178102621//3321943761931742956567494504722931571951385306909901259672532736639698840744877071134826137900014408691103259283676795574732824747130952636182780756212437965325671558426639083953787403547236777799428729452275709294783520 146369891495772494952841887152886107309500190314736084522963168283742600143244236116042027571622133931926633567867160591813599478266553183924960819072068851524440312166052500115422408817190693729567932919306865665340009051159566058468003827723339537979298568051546142650840212970175592382801497//55212156157267561109216806198851865664036304410102836735943160130831412687399333780211100436453981079073314311277679588174855628204780976556266201520567416310508791299029373557826733379133161170981394733853355927644870101350163842832370094722774179146568386199993435909173482905287863614614400 -2608156649836768732666608543008943855466995845711580787999050649351228195781061507033914673763411318695470220958850950954286872449571890733423006109802942765091360513446317866129822827445800984801155983860066367871546763258778360055065861752965423100441468071074005953745299111708108290678853038218147667954122310370079043892961158116706783824621773161296412183086567//307896894623976010054146748514391198576228873921345372725216330142644669131216427906158184527365096449292511559112523846065327932420030471820745321472435634090966569469181642312062006607116290798983644883812477636889042582133519983336548743001180374798371624666990844588001123553752938976470301117328633761192754732247383455045435878513859557375799330719481859485696 1909548155641668250566055467358906068345366179458601976147166113506387406962786757130930654052762507654586230600202502602496884646725314761271232858675706156988859855875963590256882595589327026260750395504580256371360360424928192004263152877279821526649515739460908624111704962841477598920958635461672885172637306336905501424990505224471778947022795145422267442160658515703932306687376245929356054268792796684242188488920005404990850555069033649//426962986217594225352560126442654149757269226714771429217930107091399946506198174407095332959386614596467083820170339401229876666639402307998993946055143088240497656693275829842007530240830300316277581123012713870285455960272380055770281279909977227370412869351602231174041406782116619408128941840890517628611439462164880780015487606593920867367300021415372140324697623985898646587936597854218911855447907464862510669663200553309464303450112000; 0 0 -112564739183//9373365219272 -141494584336626704174173713649734315691//13663857792723117009167381810011656236 -72907457896458909480270561323715665671699860944345248534314093311809937901279650422786737//16688723380413293827663975787515145698384312041764922108417105097427538407950227161583790 -283253049105807653303028747414106642855062970682638152850991564336240272128589470533820482679242058258518910052863505657676096838062431034696412727994041//6525411275130258429502398721925334072432494204396062297109542549038298032624244996183769068293768778067494371395845781074490822119450144306091615259310 -237721696953700223749154913893707267696512287578193059139298180411964456558990379626754852672453803797234047953931487510491782309482120520727466997920239679896189349878379596367384972963958206212403632361536300156177//5285276152106039885399099327219536921800547664967387988005124489125097000325580820284929709654645961525830100460002856603223477816268184531217968787962060318479238500849800835505240211327412169353783338947184949200 100804057866383910000138801649070810762183869052918634786866931506649754252733763555827750381755875699171087797151809466616027189228052644925307370573569943281258821230303762631070861215423688905107264275684269763247327204493146502185084296456850771369954148461253476213684196907916053927787//581009397194182943683246747667362397820851208145547835786023801302298234837753193945542724363879102126946942797243649210742030317721307484526749083812910399685373452507380638485425961411465503214946519253468274648985149358225444456830382468708463805176831557807108116864982969382541928960 -54402208114355215657456234565724981547968793718843577593572182276994943058094720821224726920367418293063502956141874696265059619186193013534177042785914258499696403669038777570572016479653043026790889286262879177045423175049097619640995010102658316523815700676634137946979952764896358299252519199642485776855302212933012209815330547162283940719719709434205225613437//1409370593141587895099595290391120781835486320443435663572931591309330864080938149244866711743325429020176530009735621002377608660001848464974221028878431758872576746455862736641798787201907313465530570387507655729620216466872192956240197086187917297539136752945017250144347794436941828923742002074720990534365830825244969786338169089194912238709105316132507904000; 0 0 0 2185051477207//2551468980502 -132695288944447609947458398029985856975//331980957249874262307105169826996586616 -2723685498164114938079489580644350119212818969205519010526020693520672946735632262289561//2832890260901206454330635046569513517583992836749798166440942858609078057026321642302930 -321673142600306093280831027699954214653452306280589059311231329340433225118902986724405241785157766853362070451132526236466153233802976851//355993248980728987846593714376172004429276355765528853582186411426211341812652624759995878867452293712679582265919533739194601331068404800 18448436911131834565321977805950346354373905671031562730094910927968922063693958119562182000660712947250599561506636714608796365799099181045439542903714116674329015828705575858914245288174439628631543//5795931771084146157187189787202814693856136298575092344243085623081594631283897848440892712292235870755049663269574538233156793740907969554230290773280582828893321275684873428246983747114675851130880 -4796791946199262879499983382324912476379929457681390320259681185684305023271739425799442742293698343271341681009941294094539644976959991589370267261787701526135504699196449112208776127343211270790408975375080787564726500765284039716507709808857756568684075873//4683336508749095233221026001210948519995715356397563647800719850229310205855843222433435804967351909173845837760887041604086346869971347139774538274404127175580104740759081290477217649076715894391047514601457812837497095533288345030892132838997503236610252800; 0 0 0 0 3012424348531//12792462456678 -1148005022743109542016656858803953488611//218180893391783363853768429352596785744 -370341084019491519786836161175184004994558333458374436755868569215325740121//380236221704573854337010481939651640455761070703010532399729321780924705340 -1283763933524395641596503821511738216840745078857812068814924022122215249311437681630260812190102025298725865456571671541329//2761410723009077202063351058071917351743886473341504644804783932844474765064463798489501255896999202864153121430476553644416 -1960617523598584667458382756346163172949518377739188335368469557160090143012475681341320259121403616693854783570192952750933406294168943605686739680098692342865431544869241527//588461690927872754606676618157059938384342658853520470993331314098308515590148509194784955329410014795207056143557059218823742824668050825851042702372207905687761356325235200; 0 0 0 0 0 -3944303808049//11994238218192 -858486924545291131785306//3089094511323674899518385 3861655060592889436696701261941241779557818997609718626307953//2887163380438394290960692114310475416005428697726491052985584 -70737902161444536934458192916666257112625499761520751358473189212602443036473227187887802234796621//99208972240881169641482648878355235855537616999141578412500871712153299076978887250231166929913664; 0 0 0 0 0 0 -457874356192//11306498036315 10298237966992729993447787//2614913088797527051627808 -36191713891094583205487720454843856502972447666645//58132638947051778286501712354010899452213104348288; 0 0 0 0 0 0 0 2//9 -1815023333875//11481503569852], -# rk_coefs_implicit = Float64[1 2//9 33777256972339//158809507322778 -70523709557357325353926362451393185076417954760652143178918227//610580103618339676020115895713119025932514229023178334091902984 -259173275379895127458760576309330523532337951147970777869503405464585562881462085641713656867342514214174126194545550287//305778461929350740657339674833009235009093599867814330749533177546027355391299023355841570642069322732753132898067074792 -34483675781191648110786485447949476841349802351981788326801039637143030060467265358101399325091272262829110785552388913826586855287191388294264020004018018771784951430241000078153131805692088161919//75417062871786346028657795203858890536386130892221243157855092150805741318095590696956952499172274738237761496268989626972303378280892228754140777305687309755754256615195720130257482858839215963040 -37972639708348074764863510493824251598614777393926316069246767548532985823720464466543328302337241205729880021748560977783654106318361163996656332009862268634667824273622244260705413151982804248607416793104785645839386988240914576932950229734443760364294642720771732116535265641883877//7438446301707241860076371052848504134938008377343437909478698968102313209124888632387419382271803298078934077384346658083736654144222793421310335890973591588099159352604539444645759450079027856611930729466766667675059084090201608750466983293829895157278044790116916895306655754063680 -675361318822696286582373698984422416980652600989947732274664396784893998350699958365528677645259735958564356945089045713897031190536030043270123125611921244593280607804710357155969107797082924569677147406605660739001613329726734039554300060040262414311187767235888588699616374393926701368049944511958912556993822054410035807555792147417285087351663706964339//123630226219870680020349596361476770433871382508507540481557306758473895598799678011237749064951182766957783589912328559541553167178524352393635212865233552460096824349116720337718202467544652294987542445262644569235429627536646217069240828264579952890116365749573770419600813640962659529066539013558587753379941704293453151029888509618278536532112898169600 14654782196644443978643077431729985169483648884762410390029797637990901973385611446945854901190651457791071027387784893201190913834296025743656446253760565138790278257703135364502640573240985081692965718431740687526063899064740113674831492617564146000207118510633012932973784580911032600355276047390516684914006149750898178961841693685461865667072601334715379620650194897355808646711454006049738723680812812767482148674909363385677//689438076396284795109728335054829653735791314735370296017312037117794482011213182070282110348444824172727799956455437364683917851965757134198084575179028073245364712043241984644922880840271203454509766225348804613999483028942936350748479419833844505273215524980061587076542114840388250782499860283573249649250234769072139477422363218528699137058153448748715557001826696715954733525497910633323205956432091712259463674714051108864 -4461407513830220875580251050725490334550039459522874983321802400684610393685337405358142021414207715460029597992569661824287292651719118056453635029394091853700171157539178998334123264701613727496982908769111231517264264427131041129298949334584170031908182425677961537622802682862369968557627139451073916218048192757202579311445016491504569877930402285098941281638204030005082843744035332231113981837799491035741510194007002757944974689989789973121919931578599142856207780304241152182820589861967884680533563//956049070484354854101428863294679885959605443930647501059097508251716081042932384339085948502606151853227770255674955102448278518521369879576672403940486307985304793739895276842844280985933427713363750074391121072190985785997371441202504772074934120525543242675232508787341768446331745780822252233969256999056709972276035403125614633349679448657648823186287006385681415026966922642222406444630198865874842701801802599037730705493910148384381716670313096615535597790558677713007878475592019318859352158208000; 0 2//9 9019974728735544362019000//16121383367705251995986549 -57651824890345896435905820007387445733//176397026424132604892288584936023214744 -569394483822128425961360994498008812778386685527868977943041878869944348458853766997807//210488291388818350516934065849262222273168238269047237479916690319680553809373402931658 -177892271546374710779107291691664969792012532687770794243317029390355982343430179360059946596144577636750977442181216291939325632637343206497346547201//147012521234151037962808775581551505208337113087251036836888218928240859032970635659340807911964767051434454493105886900324789466807896835441517907336 -383919704369457941754329028518158063811507487658336964852095693443380296068543112141786000673584208676279098136093863022708428777262383112660876324670289377930084714300408862540125034205406610949157490011608339835609911783637//26344442760191215655644739798088529689692926752327373118484506468778858097810549532007485210430802475081652108994576933264826585713577768213226959029133582479810328629748820330483662804709644439429939929608252846965120586864 -2261399638404775940589746751761420392832753756364447109721631299853303660023359404762198447745206206324794984579928463887812630219378721278566804300354141655993990378864971715455940233043394094368633920630355290024928250159116442462431613993457716201189994986398580576373637796447341690932100417953//145952048137227617938226610302497592857434727105717708999825629029961635338948544360901135364080514013359266410649571796933362882296876216275219818344916014535247229950464075526325163502362335616803411316952303511550359528874921491481911056100962355463705476002929768845044572649111998507241159360 97092263093280942741942057550605726752466994168708464529514837090991196039707625522149161651577298820704973187563178564517694300641379757266602792113481685928418577315192729672250342192196532291582485913581263215282600090242204489587216263733619619995644646398583518484852258791153979330153604407490437036360915950912833867569593777485081148866816441380275899458322624315//1612479583004224808963775230904065752678064790286785081562159213083739929788324674289887067219257108443433983018962128913764853634641547668189437137361398821199802704927937510051334276232540935195546734316300257081642762563246297006944926685030546047915828312558467525571137317205747817430366932216307777366391883573026124038422978989658576323183389169028719228490717184 -282802626277161763438401050707545946339250721480025389393398288510071885670399749493068653303108659088909408295368050261506246888038545093431463450622086150035365471807590708481769311061973508532731134307000563781849511141410805231230431778022509562382393871636238518226449187398428717818909040809695667426195691521585138951005490066131843388399667393739264776188923548153488086828503664042620160477598941768019528529813575988261761049007435793141//21295598190896517634074806612168964046102406984622923674198273681838481034098476751661608872569937782005147516986695219342148889913227138602579929066305588126088626859794197449268604455742547891320238664794542455648785085339759837315747373747299318575796168861966036993434630596679716090372380506019527557846496172514388750080669286419912765149068345588026044534047797029294973045445211187550146041575931956426433401149175067167448079387481497600; 0 0 2//9 3520027435685131909917895//67392439625927078783867992 10374073483185293075800691351051575558759313125224//47746138836300401140211549121187352932282495430227 6642791313733446579179508214782700174924317916026547161634021253916466895902882468972516472199573441//118791964230111146958678926628670032331340306410739755968623647953100579142060120581676442472306761272 -6622895468394947967912669187757757430384166778659364993716507048237325662402396562495302820418665115371324748551494603838728999391652754837466835977813519728560413//4154136066830875905459612005152635132045460364655444593270497523706808568616222990195336784464749257720280778685143458602362910961783614674055646289493447084749544 29920836681724946214974650726250291149686043740047799018094901551722329818600795752132054446672331338494928960026894953832167416707033181463992926981944339833751675914430922106897663514980100460269210542447846192646135021597//38674192958627468052836990712873600458216925237426418618887878708643014783170860460755223502803527270922392382029908449670091073022344302230096719538616962666648363169756366062116293271544077140290818250324752852339056907840 -1505331351329100790284744312062232999410728480182742988263313228480072273923793896147546730910358730742440916537651613727263338175006565989409249704402694842447576876482504319357907737621277885227743251608862669538827876935204649305331445560086485362164182598798202400033272735555030725646295469879929//369875857672103867874544244334676356349381236490850891031917082430775064643778743185734321969653007328245285359271573038251275123417021814315765569728002871020156491461633519565756279554908194388262416796753263070097027971700852257215059824494431889479383317695288221975280147593294553700407544264704 -86971973149562325232056212280020488030978769199605998946356201176203777904519805170987639175301107657472282331934640690747903775069211822108443848041622210169232080624921266741724333673319407250465683056484464248220970134336113992474333189215582686107823224484325188579233609263767744974816748117601425778060665519105143407291744772672303935953477018950602468151109411166393//99690901813582527613576610319642528190766543434329855609393196390214284549495576554797452844294800249332037961761665384801915730321192357958415208824573674018410032837631830231337140199797816584200308705369316749981413972529814756012986085402364951767565343040377810730602065530885039752281230667007931663862053248550853966137113439252661770641873160884023622867868283494400; 0 0 0 2//9 5081406456137427741660103//17412358188509308246213316 356236304392011993333949723703131159758185810705937//1277155139038113900014040244249523480364569876838190 260486926301426691443781547620527025750389451260889708090334147529502177264310268013096339163756864131//312981030590253862008846641182330180288496985307175523771278858598243851711989951486234144475242049410 37852928149328865012911315329042303413154214522109314759609464236136375878343877343743098924942700324052129496848243932304308240525612767849270762300533//36053004780910014842890824262286878474603180248018784759638286780666706355382569301404056264880196067480624823658755434150324375639435347593779556342800 -2282407438775459707827459161620796082258656114035571349534505805640738761586311700342566985607521074815130269621952300940594697694072451892889463172496981731163042213972129958348108968433780267026716885963519809201//586979546525155203441076032955690669441768372423826926737015147818414713744821503458195436376770304032419956160567706732013132695083761976296177984968780357452262886129878809385406437021903837452610385016124423680 187905796196720917487414705372253331307966329921238689334004471068314809159821853360154084116232202809571497984570623952689000063594334555529728856716894701283508482348332199532137539204074179381183915634783746631387493698467375177190429476632185477014847301089667892678056713//171539267590066748253469829383930549376008625687359040674223114224438860947702803174276756584808934631224981641442798237041027339626557083111550729701259169341702179433108120143245126633106235254081083421829904686100215770112577678034007297420438715328577239419362025241856000; 0 0 0 0 2//9 35039219008892087720673325//86154806997150663643746312 19816561771980385975982415387642517169040169952499//33005933604303579884608184152229609915639272044670 -1080291465779667303630584771557569852938590760348119247225955143675568353340734163162873//7362728080499493774017844628375478834322161690782074297578412183692075609283538079393600 91685100648952711196868954844225476461911737542022412589135402358365869905288291520183115484299444772209967806880608948953530902666248911//53470750842585428647825877252363401908523582536797620794753817140843426075734143105110415863583250150665677280703104974665972227037808640 74049777616770083818502917014560563653217344703905921643640424764673004440720926011119528196102141656185703630697851000727247130998212566262498995881110981153316679114525447468267982659//325563288501457594136865274706734413345080066918150793825834559097409312873937899975853347440100661768869793978951463834659542293231817295652841938787832036911409854062445300529557708800; 0 0 0 0 0 2//9 15781043597147568801568175//3614386140556877705089548 33422528370099625454874244206712186269//29788117088857726792026318272523497080 -7116496052409432062958367373360307502165363912530455123091509555877150515//18560597723356561489493864328969954916186144305400579545332295579833416448 5839244266756132287562573916379211891507618903337562916610354942038726347479298063853136917804854626412302750421//1913342871539584101918970805351321257356802075910770834964527552833176346473547991524123639420838328364588698624; 0 0 0 0 0 0 2//9 457874356192//11306498036315 -10278899720217//11421967853998 9602897495277010359483416541522520749//84641325960427269717195312383118262376; 0 0 0 0 0 0 0 2//9 7//9 0], +# "KennedyCarpenterARK548" => (rk_coefs=Rational{BigInt}[], +# rk_coefs_implicit = Float64[], # implicit_coefficient_is_zero = Bool[1, 0, 0, 0, 0, 0, 0, 0], # a = Float64[0 0 0 0 0 0 0 0; 4//9 0 0 0 0 0 0 0; 1//9 1183333538310//1827251437969 0 0 0 0 0 0; 895379019517//9750411845327 477606656805//13473228687314 -112564739183//9373365219272 0 0 0 0 0; -4458043123994//13015289567637 -2500665203865//9342069639922 983347055801//8893519644487 2185051477207//2551468980502 0 0 0 0; -167316361917//17121522574472 1605541814917//7619724128744 991021770328//13052792161721 2342280609577//11279663441611 3012424348531//12792462456678 0 0 0; 6680998715867//14310383562358 5029118570809//3897454228471 2415062538259//6382199904604 -3924368632305//6964820224454 -4331110370267//15021686902756 -3944303808049//11994238218192 0 0; 2193717860234//3570523412979 2193717860234//3570523412979 5952760925747//18750164281544 -4412967128996//6196664114337 4151782504231//36106512998704 572599549169//6265429158920 -457874356192//11306498036315 0], # b = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926], @@ -314,7 +316,7 @@ methods = Dict( # b_implicit = Float64[0 0 3517720773327//20256071687669 4569610470461//17934693873752 2819471173109//11655438449929 3296210113763//10722700128969 -1142099968913//5710983926999 2//9; 0 0 520639020421//8300446712847 4550235134915//17827758688493 1482366381361//6201654941325 5551607622171//13911031047899 -5266607656330//36788968843917 1074053359553//5740751784926], # ), - "KennedyCarpenterARK324" => (rk_coefs = Float64[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800; 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; 0 0 0 1767732205903//4055673282236 -189157537172543652255956//2412892370833855116699825], + "KennedyCarpenterARK324" => (rk_coefs = Float64[-1247523335473//4055673282236 79264835984649679285542915383850087029//197703563163588166433309052703741041388 143374873327169165072944877723054563796993634825533873250938500977412076430225058147123297//48164537938468181360053681019221391245537623484212879143241031703829399581257519267114308 183533876663877280630903815242772717891981696485790726248876227532653907716433834740782318700703430625849345155700369934729507//152774769284679137140932574224840322584566969939460270285400962599369488056766613240926963620623994179069278241372235976473942 -674269476752868162123858074951153786574711903849607589191198931851708688548176820435148552544391865584341865043692699736944296248896158605451212237381612962268906411298755093339964390670721//2546134965897308089555580666805701115486032970876088660128805677838927216400319783331222992194057059091619325472569216389983265703597042520242784079940164596262272590600818697485927109390800; 1767732205903//2027836641118 104089650763298376775517283594005205634//126974577376103704518428054341484614367 402649974715726257110381944227129692139788970635058928062257578119864655690737612569045525//59412386975052518972493036127105554419479894359642287241033487141332226598183812678018467 14364246105178652475593341035586171289599000967514280222073651706570666894470602049892875371222508981379602016199819025473080098//7260819418426986996892762999677651302312472577052165412779166595718930645183114458002425346612148056054055237577670620367624747 -592205887440399614308635936680843507009892739451509439144512297133172235516402369571203494065187197645330486644003505743504146737687284998414220824119624619321360424932079759986667400688521//924270765882147767919737706090852676802697317279651422141773866515771469294827079989551846085654584104864692146726487859057882031810152027557213168580198324425357042783589702576114965490700; 0 788022342437//10882634858940 -1296455210574762780005510449066529142309392831329604//156723304220428565752393692055114250106676689860589 -99673073274892040508947928240745592597410069006658308475742716315333750923451055334466588//36786590455936249721221951824053931878290276425173065614254196079782580911623834181388539 1168350732453084113921674966113208169458618576839040713544125474188024763720251037296265456175894768135827904089264712977423236877161701237//1613695686286088606494807246788767988268387847524889706258074110760741418740305745483456939972126277819165534580240908885870858409423997925; 0 0 10755448449292//10357097424841 -2001365002799665343288696//31172749426290664269423183 -82906599394091874511049578920146826651351968733949252229257729//3202722135068822020151439455680372156253901385805112449655312300; 0 0 0 1767732205903//4055673282236 2223734833661311464443869//2412892370833855116699825], rk_coefs_implicit = Float64[1 1767732205903//4055673282236 -687399076962262115744047//111840222463218881928454996 -1336358764442892755444059056266228090581151350208390996588040500539220614055//27246512666133488484210277154496662561525905554882950399821293911874378720636 285200943189932567138799198875705878963855729946875982791861565057705737765121043333532449390223175098959890725739//3197693884094627611809027664829775477382158476704304256365487708404699034948521210742881159797768179105035102983218 349906153740531530807086677680742934068250971081202838415859967794240267241728221444895340256117278121084468557470142822774574506555702945441070832822061777207585555333938753//13983881081032778505142889772097359302066194326597505729957247063227579945322705391794876798087456494699396393914333627777048370428560157989974144628536010494017605716344421200; 0 1767732205903//4055673282236 -37790740941101883580610910860591527247//131693372088198837795836211504917271540 -1855100908631287514752342850534478233437898769154169242982744248//696821012189129201178283337188250633903779865345141334346955981 -13864927788667680976894941132472814614486628394925655027069496971358505136846849603294768//21721826807945957279896846558408676329589122800041876081057713354971012403885490761907523 3279440794571605320352156805403819083072967867568566199921530274858816688510719259249438840491831638733833253082509394728011722335821499666//12387161495243534653846823539531835409482524701416597408671153384101508978692879955207371459557771903678231566977186207593669409183236242425; 0 0 1767732205903//4055673282236 252818125219158362337262316052985694732//212256555723208031266376853558764309283 91446741129505469383144040723161877446754697117568//638848912416080950914803563396305008955201041811429 -64974104611006292878697003397814574642940098342186996399489269962607827562729361752864//16409007790583358079608534742015990629506873866141219637841130600794261103005884731491225; 0 0 0 1767732205903//4055673282236 2287941076333//4055673282236 0], implicit_coefficient_is_zero = Bool[1, 0, 0, 0], a = Float64[0 0 0 0; 1767732205903//2027836641118 0 0 0; 5535828885825//10492691773637 788022342437//10882634858940 0 0; 6485989280629//16251701735622 -4246266847089//9704473918619 10755448449292//10357097424841 0], @@ -324,7 +326,7 @@ methods = Dict( ), ) -a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs) +a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true) methods["RKF45 attempt 2"] = (rk_coefs = methods["RKF45"].rk_coefs, a = a, b = b) From 9e60f3e1d2559659a81ded4b85c99c4305f4d94e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 6 Jun 2024 11:43:52 +0100 Subject: [PATCH 71/75] Apply boundary conditions and constraints to low-order solution ...before calculating the timestep error estimate. This ensures that we do not get spurious large errors at grid points that are actually set by the boundary conditions. --- moment_kinetics/src/time_advance.jl | 68 ++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index c36976f93..5114790a7 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -1945,11 +1945,11 @@ Check the error estimate for the embedded RK method and adjust the timestep if appropriate. """ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, - fields, composition, collisions, geometry, - external_source_settings, spectral_objects, - advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, - success, nl_max_its_fraction) + fields, boundary_distributions, composition, + collisions, geometry, external_source_settings, + spectral_objects, advect_objects, gyroavs, + num_diss_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) #error_norm_method = "Linf" error_norm_method = "L2" @@ -2020,9 +2020,52 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen skip_r_inner = r.irank != 0 skip_z_lower = z.irank != 0 - # Calculate error for ion distribution functions - # Note we store the calculated error in `scratch[2]`. + # Calculate low-order approximations, from which the timestep error can be estimated. + # Note we store the calculated low-order approxmation in `scratch[2]`. rk_loworder_solution!(scratch, scratch_implicit, :pdf, t_params) + if moments.evolve_density + begin_s_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :density, t_params) + end + if moments.evolve_upar + begin_s_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :upar, t_params) + end + if moments.evolve_ppar + begin_s_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :ppar, t_params) + end + if n_neutral_species > 0 + begin_sn_r_z_vzeta_vr_region() + rk_loworder_solution!(scratch, scratch_implicit, :pdf_neutral, t_params; neutrals=true) + if moments.evolve_density + begin_sn_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :density_neutral, t_params; neutrals=true) + end + if moments.evolve_upar + begin_sn_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :uz_neutral, t_params; neutrals=true) + end + if moments.evolve_ppar + begin_sn_r_z_region() + rk_loworder_solution!(scratch, scratch_implicit, :pz_neutral, t_params; neutrals=true) + end + end + + # Apply boundary conditions and constraints + apply_all_bcs_constraints_update_moments!( + scratch[2], moments, fields, boundary_distributions, vz, vr, vzeta, + vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, false) + + # Re-calculate moment derivatives in the `moments` struct, in case they were changed + # by the previous call + apply_all_bcs_constraints_update_moments!( + scratch[t_params.n_rk_stages+1], moments, fields, boundary_distributions, vz, vr, + vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, geometry, + gyroavs, num_diss_params, advance, scratch_dummy, false; pdf_bc_constraints=false) + + # Calculate the timstep error estimates ion_pdf_error = local_error_norm(scratch[2].pdf, scratch[t_params.n_rk_stages+1].pdf, t_params.rtol, t_params.atol; method=error_norm_method, skip_r_inner=skip_r_inner, @@ -2035,7 +2078,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen # Calculate error for ion moments, if necessary if moments.evolve_density begin_s_r_z_region() - rk_loworder_solution!(scratch, scratch_implicit, :density, t_params) ion_n_err = local_error_norm(scratch[2].density, scratch[t_params.n_rk_stages+1].density, t_params.rtol, t_params.atol; @@ -2047,7 +2089,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_upar begin_s_r_z_region() - rk_loworder_solution!(scratch, scratch_implicit, :upar, t_params) ion_u_err = local_error_norm(scratch[2].upar, scratch[t_params.n_rk_stages+1].upar, t_params.rtol, t_params.atol; method=error_norm_method, @@ -2058,7 +2099,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_ppar begin_s_r_z_region() - rk_loworder_solution!(scratch, scratch_implicit, :ppar, t_params) ion_p_err = local_error_norm(scratch[2].ppar, scratch[t_params.n_rk_stages+1].ppar, t_params.rtol, t_params.atol; method=error_norm_method, @@ -2101,7 +2141,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen push!(CFL_limits, t_params.CFL_prefactor * neutral_vz_CFL) # Calculate error for neutral distribution functions - rk_loworder_solution!(scratch, scratch_implicit, :pdf_neutral, t_params; neutrals=true) neut_pdf_error = local_error_norm(scratch[2].pdf_neutral, scratch[end].pdf_neutral, t_params.rtol, t_params.atol; method=error_norm_method, @@ -2116,7 +2155,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen # Calculate error for neutral moments, if necessary if moments.evolve_density begin_sn_r_z_region() - rk_loworder_solution!(scratch, scratch_implicit, :density_neutral, t_params; neutrals=true) neut_n_err = local_error_norm(scratch[2].density_neutral, scratch[end].density_neutral, t_params.rtol, t_params.atol, true; method=error_norm_method, @@ -2128,7 +2166,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_upar begin_sn_r_z_region() - rk_loworder_solution!(scratch, scratch_implicit, :uz_neutral, t_params; neutrals=true) neut_u_err = local_error_norm(scratch[2].uz_neutral, scratch[t_params.n_rk_stages+1].uz_neutral, t_params.rtol, t_params.atol, true; @@ -2141,7 +2178,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, momen end if moments.evolve_ppar begin_sn_r_z_region() - rk_loworder_solution!(scratch, scratch_implicit, :pz_neutral, t_params; neutrals=true) neut_p_err = local_error_norm(scratch[2].pz_neutral, scratch[t_params.n_rk_stages+1].pz_neutral, t_params.rtol, t_params.atol, true; @@ -2312,8 +2348,8 @@ function ssp_rk!(pdf, scratch, scratch_implicit, t, t_params, vz, vr, vzeta, vpa end end adaptive_timestep_update!(scratch, scratch_implicit, t, t_params, moments, fields, - composition, collisions, geometry, - external_source_settings, spectral_objects, + boundary_distributions, composition, collisions, + geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, advance, scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, success, nl_max_its_fraction) From 63fae8c2ff8704c356d595192f713b9912da0321 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 11 Jun 2024 13:30:39 +0100 Subject: [PATCH 72/75] Skip nonlinear solver tests on macOS MINPACK.jl is broken on macOS (possibly just on ARM?), so skip the nonlinear solver tests on macOS. --- moment_kinetics/test/nonlinear_solver_tests.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index 1e63de1be..5c0b08dfe 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -268,10 +268,18 @@ function nonlinear_test() end function runtests() - @testset "non-linear solvers" begin - println("non-linear solver tests") - linear_test() - nonlinear_test() + if Sys.isapple() + @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin + println("non-linear solver tests") + linear_test() + nonlinear_test() + end + else + @testset "non-linear solvers" begin + println("non-linear solver tests") + linear_test() + nonlinear_test() + end end end From f643dda4e47b825184e571ffbe98a6d0f68b6c60 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 20 Jun 2024 15:55:38 +0100 Subject: [PATCH 73/75] Fix region in distributed_dot_s_r_z_vperp_vpa() --- moment_kinetics/src/nonlinear_solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 7219c1530..e0872b102 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -565,7 +565,7 @@ function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5}, vperp = coords.vperp vpa = coords.vpa - begin_z_region() + begin_s_r_z_vperp_vpa_region() local_dot = 0.0 if r.irank < r.nrank - 1 From 2138739935b5e9f61945e791dde963bd46b076a7 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 29 Jun 2024 21:48:55 +0100 Subject: [PATCH 74/75] In nonlinear solve, always do at least one Newton iteration Ensures implicitly advanced quantities always get updated even when timestep is very small. Do allow continuing without doing an iteration if the residual is very small, to avoid creating NaNs. --- moment_kinetics/src/nonlinear_solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index e0872b102..20b016ee4 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -254,7 +254,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, close_linear_counter = -1 success = true previous_residual_norm = residual_norm - while residual_norm > 1.0 + while (counter < 1 && residual_norm > 1.0e-8) || residual_norm > 1.0 counter += 1 #println("\nNewton ", counter) From 4f31b31b9e927761359737c0eb978b84896a50fe Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 16 Jul 2024 20:14:26 +0100 Subject: [PATCH 75/75] Fix Jacobian-vector product for updated error tolerances Now that `distributed_norm()`, etc. include error tolerances, vectors normalised to '1.0' are actually very small, so instead of doing `x + epsilon * v` for a small `epsilon`, we should do `x + Jv_scale_factor * v` for a large-ish `Jv_scale_factor`. Otherwise `x + epsilon * v` would be so small that rounding errors are large relative errors on the estimate of `J.v`, which would prevent the Newton solver from converging. --- moment_kinetics/src/nonlinear_solvers.jl | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 20b016ee4..79a87fc09 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -82,7 +82,7 @@ function setup_nonlinear_solve(input_dict, coords, outer_coords=(); default_rtol atol=default_atol, nonlinear_max_iterations=20, linear_rtol=1.0e-3, - linear_atol=1.0e-15, + linear_atol=1.0, linear_restart=10, linear_max_restarts=0, preconditioner_update_interval=300, @@ -780,16 +780,25 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # Solve (approximately?): # J δx = residual0 - tol = max(rtol, atol) - epsilon = 1.0e-6 / tol - inv_epsilon = 1.0 / epsilon - + Jv_scale_factor = 1.0e3 + inv_Jv_scale_factor = 1.0 / Jv_scale_factor + + # The vectors `v` that are passed to this function will be normalised so that + # `distributed_norm(v) == 1.0`. `distributed_norm()` is defined - including the + # relative and absolute tolerances from the Newton iteration - so that a vector with a + # norm of 1.0 is 'small' in the sense that a vector with a norm of 1.0 is small enough + # relative to `x` to consider the iteration converged. This means that `x+v` would be + # very close to `x`, so R(x+v)-R(x) would be likely to be badly affected by rounding + # errors, because `v` is so small, relative to `x`. We actually want to multiply `v` + # by a large number `Jv_scale_factor` (in constrast to the small `epsilon` in the + # 'usual' case where the norm does not include either reative or absolute tolerance) + # to ensure that we get a reasonable estimate of J.v. function approximate_Jacobian_vector_product!(v) right_preconditioner(v) - parallel_map((x,v) -> x + epsilon * v, v, x, v) + parallel_map((x,v) -> x + Jv_scale_factor * v, v, x, v) residual_func!(rhs_delta, v) - parallel_map((rhs_delta, residual0) -> (rhs_delta - residual0) * inv_epsilon, + parallel_map((rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor, v, rhs_delta, residual0) left_preconditioner(v) return v