diff --git a/.github/workflows/debug_checks.yml b/.github/workflows/debug_checks.yml index fc1e7a4a0..451c08502 100644 --- a/.github/workflows/debug_checks.yml +++ b/.github/workflows/debug_checks.yml @@ -31,7 +31,7 @@ jobs: julia --project -O3 --check-bounds=yes -e 'using Pkg; Pkg.add(["MPI", "MPIPreferences", "PackageCompiler", "Symbolics"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' julia --project -O3 --check-bounds=yes -e 'using Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.precompile()' - julia --project -O3 --check-bounds=yes precompile.jl --debug 2 + julia --project -O3 --check-bounds=yes precompile-with-check-bounds.jl --debug 2 # Need to use openmpi so that we can use `--oversubscribe` to allow using more MPI ranks than physical cores ## Don't use --compiled-modules=no for now, as it currently breaks Symbolics.jl diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index d819ef9fd..e38a824e9 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -23,8 +23,4 @@ jobs: touch Project.toml julia -O3 --project -e 'import Pkg; Pkg.develop(path="moment_kinetics/"); Pkg.add("NCDatasets"); Pkg.precompile()' # Reduce nstep for each example to 10 to avoid the CI job taking too long - # Note we skip the example `if (occursin("ARK", get(t_input, "type", "") && Sys.isapple())` - # because the way we use MINPACK.jl (needed for nonlinear solvers - # used for implicit parts of timestep) doesn't currently work on - # macOS. - julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); if (occursin("ARK", get(t_input, "type", "")) && Sys.isapple()) continue end; t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(input, "z_nelement_local", ""); pop!(input, "r_nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' + julia -O3 --project -e 'using moment_kinetics; for (root, dirs, files) in walkdir("examples") for file in files if endswith(file, ".toml") filename = joinpath(root, file); println(filename); input = moment_kinetics.moment_kinetics_input.read_input_file(filename); t_input = get(input, "timestepping", Dict{String,Any}()); t_input["nstep"] = 10; t_input["dt"] = 1.0e-12; input["timestepping"] = t_input; pop!(get(input, "z", Dict{String,Any}()), "nelement_local", ""); pop!(get(input, "r", Dict{String,Any}()), "nelement_local", ""); electron_t_input = get(input, "electron_timestepping", Dict{String,Any}()); electron_t_input["initialization_residual_value"] = 1.0e8; electron_t_input["converged_residual_value"] = 1.0e8; input["electron_timestepping"] = electron_t_input; nl_solver_input = get(input, "nonlinear_solver", Dict{String,Any}()); nl_solver_input["rtol"] = 1.0e6; nl_solver_input["atol"] = 1.0e6; input["nonlinear_solver"] = nl_solver_input; run_moment_kinetics(input) end end end' diff --git a/.github/workflows/parallel_test.yml b/.github/workflows/parallel_test.yml index df1ccf121..3f246e64d 100644 --- a/.github/workflows/parallel_test.yml +++ b/.github/workflows/parallel_test.yml @@ -23,7 +23,7 @@ jobs: touch Project.toml julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "Test"]); Pkg.develop(path="moment_kinetics/")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' # Need to use openmpi so that we can use `--oversubscribe` to allow using more MPI ranks than physical cores ./mpiexecjl -np 3 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --debug 1 @@ -48,7 +48,7 @@ jobs: touch Project.toml julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_jll_binary("OpenMPI_jll")' julia --project -O3 --check-bounds=no -e 'using MPI; MPI.install_mpiexecjl(; destdir=".")' - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "Test"]); Pkg.develop(path="moment_kinetics/")' + julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["Random", "SpecialFunctions", "StatsBase", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' # Need to use openmpi so that we can use `--oversubscribe` to allow using more MPI ranks than physical cores ./mpiexecjl -np 4 --oversubscribe julia --project -O3 --check-bounds=no moment_kinetics/test/runtests.jl --debug 1 diff --git a/examples/kinetic-electrons/README.md b/examples/kinetic-electrons/README.md new file mode 100644 index 000000000..eaaa03310 --- /dev/null +++ b/examples/kinetic-electrons/README.md @@ -0,0 +1,10 @@ +This directory contains input files for some kinetic electron simulations that +are known to run (and probably some other experimental input files too). Inputs +that are expected to work: +* Wall bc with uniform grid. First converge a Boltzmann-electron simulation to + steady state, then restart kinetic electron simulation from that, e.g. + ```julia + run_moment_kinetics("wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml") + run_moment_kinetics("wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml; restart="runs/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.dfns.h5") + run_moment_kinetics("wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml"; restart="runs/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.dfns.h5") + ``` diff --git a/examples/kinetic-electrons/periodic_split3_boltzmann.toml b/examples/kinetic-electrons/periodic_split3_boltzmann.toml index b0bbdc4ee..1b7715688 100644 --- a/examples/kinetic-electrons/periodic_split3_boltzmann.toml +++ b/examples/kinetic-electrons/periodic_split3_boltzmann.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml index a67591698..a299f73f1 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic-IMEX.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true @@ -16,25 +13,25 @@ ngrid = 1 nelement = 1 [z] -ngrid = 17 -nelement = 16 +ngrid = 5 +nelement = 32 #nelement_local = 16 bc = "periodic" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vpa] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 @@ -89,6 +86,7 @@ upar_phase = 0.0 [timestepping] type = "KennedyCarpenterARK324" implicit_electron_advance = true +implicit_electron_ppar = false implicit_ion_advance = false implicit_vpa_advection = false nstep = 1000000 @@ -116,6 +114,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 #debug_io = 10000 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml new file mode 100644 index 000000000..3a6c18ad7 --- /dev/null +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder-PareschiRusso2222.toml @@ -0,0 +1,152 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +ionization_frequency = 0.0 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[krook_collisions] +nuee0 = 1000.0 +use_krook = true +frequency_option = "reference_parameters" +nuei0 = 1000.0 + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 16 +nelement_local = 2 +bc = "periodic" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "sinusoid" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 1.0 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "sinusoid" +density_amplitude = 0.1 +temperature_amplitude = 0.1 +density_phase = 0.0 +upar_amplitude = 0.1 +temperature_phase = 1.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "PareschiRusso2(2,2,2)" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 500000 +dt = 2.0e-4 +#nwrite = 50 +#nwrite_dfns = 50 +nwrite = 5 +nwrite_dfns = 5 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +#dt = 2.0e-8 +dt = 5.0e-5 +maximum_dt = 1.0 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-6 +atol = 1.0e-14 +minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +#converged_residual_value = 0.1 #1.0e-3 +converged_residual_value = 1.0e-2 +#debug_io = 10000 +constraint_forcing_rate = 1.0e-4 + +[nonlinear_solver] +#nonlinear_max_iterations = 20 #100 +nonlinear_max_iterations = 1000 +rtol = 1.0e-8 #1.0e-5 +atol = 1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 #1000 + +[ion_numerical_dissipation] +vpa_dissipation_coefficient = 1.0e0 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +#vpa_dissipation_coefficient = 2.0e2 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +vz_dissipation_coefficient = 1.0e-1 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml new file mode 100644 index 000000000..68b355bed --- /dev/null +++ b/examples/kinetic-electrons/periodic_split3_kinetic-implicit-electron_ppar-loworder.toml @@ -0,0 +1,160 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +ionization_frequency = 0.0 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[krook_collisions] +nuee0 = 1000.0 +use_krook = true +frequency_option = "reference_parameters" +nuei0 = 1000.0 + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 12.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 16 +#nelement_local = 16 +bc = "periodic" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "sinusoid" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 1.0 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "sinusoid" +density_amplitude = 0.1 +temperature_amplitude = 0.1 +density_phase = 0.0 +upar_amplitude = 0.1 +temperature_phase = 1.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-6 +minimum_dt = 1.0e-7 +#maximum_dt = 2.0e-5 +rtol = 1.0e-4 +max_increase_factor_near_last_fail = 1.001 +last_fail_proximity_factor = 1.1 +max_increase_factor = 1.05 +nwrite = 10000 +nwrite_dfns = 10000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +#dt = 2.0e-8 +dt = 2.0e-5 +maximum_dt = 1.0 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-6 +atol = 1.0e-14 +minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +#converged_residual_value = 0.1 #1.0e-3 +converged_residual_value = 1.0e-2 +#debug_io = 10000 +constraint_forcing_rate = 1.0e-4 + +[nonlinear_solver] +nonlinear_max_iterations = 20 #100 +rtol = 1.0e-8 #1.0e-5 +atol = 1.0e-16 +#linear_restart = 40 +#linear_restart = 200 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_numerical_dissipation] +vpa_dissipation_coefficient = 1.0e0 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +#vpa_dissipation_coefficient = 2.0e2 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +vz_dissipation_coefficient = 1.0e-1 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic.toml b/examples/kinetic-electrons/periodic_split3_kinetic.toml index b99a5afb0..ad33440e0 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true @@ -16,25 +13,25 @@ ngrid = 1 nelement = 1 [z] -ngrid = 17 -nelement = 16 +ngrid = 5 +nelement = 32 #nelement_local = 16 bc = "periodic" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vpa] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 @@ -134,6 +131,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 #debug_io = 10000 diff --git a/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml b/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml index 8a7e9ea3a..a16f23b0c 100644 --- a/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml +++ b/examples/kinetic-electrons/periodic_split3_kinetic_high-collisionality.toml @@ -2,9 +2,6 @@ charge_exchange_frequency = 0.75 ionization_frequency = 0.0 -[electron_fluid_collisions] -nu_ei = 1000.0 - [evolve_moments] density = true parallel_flow = true @@ -16,25 +13,25 @@ ngrid = 1 nelement = 1 [z] -ngrid = 17 -nelement = 16 +ngrid = 5 +nelement = 32 #nelement_local = 16 bc = "periodic" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vpa] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 nelement = 31 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 @@ -137,6 +134,9 @@ type = "Fekete4(3)" rtol = 1.0e-6 atol = 1.0e-14 minimum_dt = 1.0e-10 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 #debug_io = 10000 diff --git a/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml b/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml index a06f36b1e..ca8ebbf38 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_boltzmann_loworder.toml @@ -5,9 +5,6 @@ ionization_frequency = 2.0 #electron_ionization_frequency = 2.0 #ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false diff --git a/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml b/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml index 84fb66008..6270dc514 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_kinetic.toml @@ -2,12 +2,9 @@ charge_exchange_frequency = 2.0 electron_charge_exchange_frequency = 0.0 ionization_frequency = 2.0 -electron_ionization_frequency = 2.0 +#electron_ionization_frequency = 2.0 ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false @@ -20,29 +17,29 @@ nelement = 1 [z] ngrid = 9 -#nelement = 16 -nelement = 32 +nelement = 16 +#nelement = 32 #nelement = 64 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 17 -#nelement = 10 -nelement = 20 +nelement = 10 +#nelement = 20 L = 12.0 #8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" #discretization = "gausslegendre_pseudospectral" [vz] ngrid = 17 -#nelement = 10 -nelement = 20 +nelement = 10 +#nelement = 20 L = 8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml b/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml index 843b7696a..6064b6347 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_kinetic_krook_loworder.toml @@ -2,12 +2,9 @@ charge_exchange_frequency = 2.0 electron_charge_exchange_frequency = 0.0 ionization_frequency = 2.0 -electron_ionization_frequency = 2.0 +#electron_ionization_frequency = 2.0 ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false @@ -20,28 +17,31 @@ nelement = 1 [z] ngrid = 5 -#nelement = 32 -nelement = 64 +nelement = 32 +#nelement = 64 #nelement = 128 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 5 +nelement = 31 #nelement = 40 -nelement = 80 +#nelement = 80 L = 12.0 #8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" #discretization = "gausslegendre_pseudospectral" [vz] ngrid = 5 -nelement = 80 +nelement = 31 +#nelement = 40 +#nelement = 80 L = 8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml b/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml index 6abaa537c..b09041af1 100644 --- a/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml +++ b/examples/kinetic-electrons/wall+sheath-bc_kinetic_loworder.toml @@ -5,9 +5,6 @@ ionization_frequency = 2.0 #electron_ionization_frequency = 2.0 #ionization_energy = 1.0 -[electron_fluid_collisions] -nu_ei = 0.0 - [evolve_moments] density = false parallel_flow = false @@ -20,28 +17,31 @@ nelement = 1 [z] ngrid = 5 -#nelement = 32 -nelement = 64 +nelement = 32 +#nelement = 64 #nelement = 128 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 5 +nelement = 31 #nelement = 40 -nelement = 80 +#nelement = 80 L = 12.0 #8.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" #discretization = "gausslegendre_pseudospectral" [vz] ngrid = 5 -nelement = 80 +nelement = 31 +#nelement = 40 +#nelement = 80 L = 12.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml new file mode 100644 index 000000000..528b2d80a --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z-init.toml @@ -0,0 +1,129 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "Fekete4(3)" +nstep = 2000000 +dt = 1.0e-5 +minimum_dt = 1.0e-6 +#maximum_dt = 5.0e-5 +#rtol = 1.0e-5 +#atol = 1.0e-12 +rtol = 1.0 +atol = 1.0 +nwrite = 25000 +nwrite_dfns = 25000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml new file mode 100644 index 000000000..c3dea0ad4 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_boltzmann-coarse_tails-uniform-z.toml @@ -0,0 +1,127 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "Fekete4(3)" +nstep = 10000000 +dt = 1.0e-5 +minimum_dt = 1.0e-6 +#maximum_dt = 5.0e-5 +rtol = 1.0e-5 +atol = 1.0e-12 +nwrite = 25000 +nwrite_dfns = 25000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml new file mode 100644 index 000000000..edbc18d7c --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-PareschiRusso2222.toml @@ -0,0 +1,160 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "PareschiRusso2(2,2,2)" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 100000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 2.0e-8 +#maximum_dt = 1.0e-8 +nwrite = 10 #10000 +nwrite_dfns = 10 #100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml index dc32eac73..2fbd82e81 100644 --- a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-vpadiss0.toml @@ -17,27 +17,28 @@ ngrid = 5 nelement = 32 #nelement_local = 16 bc = "wall" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" element_spacing_option = "sqrt" [vpa] ngrid = 6 -nelement = 63 +nelement = 31 #63 L = 48.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [vz] ngrid = 6 -nelement = 63 +nelement = 31 #63 L = 36.0 bc = "zero" -discretization = "chebyshev_pseudospectral" +discretization = "gausslegendre_pseudospectral" [composition] n_ion_species = 1 n_neutral_species = 1 -electron_physics = "kinetic_electrons_with_temperature_equation" +#electron_physics = "kinetic_electrons_with_temperature_equation" +electron_physics = "kinetic_electrons" recycling_fraction = 0.5 T_e = 0.2 # 1.0 T_wall = 0.1 @@ -110,6 +111,9 @@ type = "Fekete4(3)" rtol = 1.0e-3 atol = 1.0e-14 minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 initialization_residual_value = 2.5 converged_residual_value = 0.1 #1.0e-3 diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 42fa8f601..9acef44c4 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -604,8 +604,8 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, # - Don't allow setting "itime_*" and "itime_*_dfns" per-variable because we # load time and time_dfns in run_info and these must use the same # "itime_*"/"itime_*_dfns" setting as each variable. - time_index_options = ("itime_min", "itime_max", "itime_skip", "itime_min_dfns", - "itime_max_dfns", "itime_skip_dfns") + only_global_options = ("itime_min", "itime_max", "itime_skip", "itime_min_dfns", + "itime_max_dfns", "itime_skip_dfns", "handle_errors") set_defaults_and_check_top_level!(this_input_dict; # Options that only apply at the global level (not per-variable) @@ -656,11 +656,14 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, animate_vs_z_r=false, show_element_boundaries=false, steady_state_residual=false, + # By default, errors are caught so that later plots can still be made. For + # debugging it can be useful to turn this off. + handle_errors=true, ) section_defaults = OrderedDict(k=>v for (k,v) ∈ this_input_dict if !isa(v, AbstractDict) && - !(k ∈ time_index_options)) + !(k ∈ only_global_options)) for variable_name ∈ tuple(all_moment_variables..., timestep_diagnostic_variables...) set_defaults_and_check_section!( this_input_dict, variable_name; @@ -818,7 +821,8 @@ function _setup_single_input!(this_input_dict::OrderedDict{String,Any}, end function makie_post_processing_error_handler(e::Exception, message::String) - if isa(e, InterruptException) + handle_errors = get(input_dict, "handle_errors", true) + if isa(e, InterruptException) || !handle_errors rethrow(e) else println(message * "\nError was $e.") @@ -1125,6 +1129,7 @@ function plots_for_dfn_variable(run_info, variable_name; plot_prefix, has_rdim=t input = Dict_to_NamedTuple(input_dict_dfns[variable_name]) is_neutral = variable_name ∈ neutral_dfn_variables + is_electron = variable_name ∈ electron_dfn_variables if is_neutral animate_dims = setdiff(neutral_dimensions, (:sn,)) @@ -1241,23 +1246,30 @@ function plots_for_dfn_variable(run_info, variable_name; plot_prefix, has_rdim=t else if input[Symbol(:plot, log, :_unnorm_vs_vpa)] outfile = var_prefix * "unnorm_vs_vpa.pdf" - plot_f_unnorm_vs_vpa(run_info; input=input, is=is, outfile=outfile, - yscale=yscale, transform=transform) + plot_f_unnorm_vs_vpa(run_info; input=input, electron=is_electron, + is=is, outfile=outfile, yscale=yscale, + transform=transform) end if has_zdim && input[Symbol(:plot, log, :_unnorm_vs_vpa_z)] outfile = var_prefix * "unnorm_vs_vpa_z.pdf" - plot_f_unnorm_vs_vpa_z(run_info; input=input, is=is, outfile=outfile, - colorscale=yscale, transform=transform) + plot_f_unnorm_vs_vpa_z(run_info; input=input, + electron=is_electron, is=is, + outfile=outfile, colorscale=yscale, + transform=transform) end if input[Symbol(:animate, log, :_unnorm_vs_vpa)] outfile = var_prefix * "unnorm_vs_vpa." * input.animation_ext - animate_f_unnorm_vs_vpa(run_info; input=input, is=is, outfile=outfile, - yscale=yscale, transform=transform) + animate_f_unnorm_vs_vpa(run_info; input=input, + electron=is_electron, is=is, + outfile=outfile, yscale=yscale, + transform=transform) end if has_zdim && input[Symbol(:animate, log, :_unnorm_vs_vpa_z)] outfile = var_prefix * "unnorm_vs_vpa_z." * input.animation_ext - animate_f_unnorm_vs_vpa_z(run_info; input=input, is=is, outfile=outfile, - colorscale=yscale, transform=transform) + animate_f_unnorm_vs_vpa_z(run_info; input=input, + electron=is_electron, is=is, + outfile=outfile, colorscale=yscale, + transform=transform) end end check_moment_constraints(run_info, is_neutral; input=input, plot_prefix) @@ -3797,9 +3809,9 @@ function calculate_steady_state_residual(run_info, variable_name; is=1, data=not end """ - plot_f_unnorm_vs_vpa(run_info; input=nothing, neutral=false, it=nothing, is=1, - iz=nothing, fig=nothing, ax=nothing, outfile=nothing, - yscale=identity, transform=identity, + plot_f_unnorm_vs_vpa(run_info; input=nothing, electron=false, neutral=false, + it=nothing, is=1, iz=nothing, fig=nothing, ax=nothing, + outfile=nothing, yscale=identity, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) Plot an unnormalized distribution function against \$v_\\parallel\$ at a fixed z. @@ -3811,8 +3823,9 @@ The information for the runs to plot is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are overlayed on the same axis. -By default plots the ion distribution function. If `neutrals=true` is passed, plots the -neutral distribution function instead. +By default plots the ion distribution function. If `electron=true` is passed, plots the +electron distribution function instead. If `neutral=true` is passed, plots the neutral +distribution function instead. `is` selects which species to analyse. @@ -3844,8 +3857,9 @@ Any extra `kwargs` are passed to [`plot_1d`](@ref). """ function plot_f_unnorm_vs_vpa end -function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, - outfile=nothing, axis_args=Dict{Symbol,Any}(), kwargs...) +function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, electron=false, + neutral=false, outfile=nothing, + axis_args=Dict{Symbol,Any}(), kwargs...) try n_runs = length(run_info) @@ -3855,8 +3869,8 @@ function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, fig, ax = get_1d_ax(; xlabel=L"v_\parallel", ylabel=ylabel, axis_args...) for ri ∈ run_info - plot_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, neutral=neutral, ax=ax, - kwargs...) + plot_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, electron=electron, + neutral=neutral, ax=ax, kwargs...) end if n_runs > 1 @@ -3875,10 +3889,16 @@ function plot_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, end end -function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutral=false, - it=nothing, is=1, iz=nothing, fig=nothing, ax=nothing, - outfile=nothing, transform=identity, +function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, electron=false, + neutral=false, it=nothing, is=1, iz=nothing, fig=nothing, + ax=nothing, outfile=nothing, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -3897,7 +3917,7 @@ function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutra end if ax === nothing - species_label = neutral ? "n" : "i" + species_label = neutral ? "n" : electron ? "e" : "i" divide_by = f_over_vpa2 ? L"/v_\parallel^2" : "" ylabel = L"f_{%$species_label,\mathrm{unnormalized}}%$divide_by" fig, ax = get_1d_ax(; xlabel=L"v_\parallel", ylabel=ylabel, axis_args...) @@ -3913,11 +3933,13 @@ function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutra iz=iz) vcoord = run_info.vz else - f = get_variable(run_info, "f"; it=it, is=is, ir=input.ir0, iz=iz, + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = get_variable(run_info, "f$suffix"; it=it, is=is, ir=input.ir0, iz=iz, ivperp=input.ivperp0) - density = get_variable(run_info, "density"; it=it, is=is, ir=input.ir0, iz=iz) - upar = get_variable(run_info, "parallel_flow"; it=it, is=is, ir=input.ir0, iz=iz) - vth = get_variable(run_info, "thermal_speed"; it=it, is=is, ir=input.ir0, iz=iz) + density = get_variable(run_info, "$(prefix)density"; it=it, is=is, ir=input.ir0, iz=iz) + upar = get_variable(run_info, "$(prefix)parallel_flow"; it=it, is=is, ir=input.ir0, iz=iz) + vth = get_variable(run_info, "$(prefix)thermal_speed"; it=it, is=is, ir=input.ir0, iz=iz) vcoord = run_info.vpa end @@ -3963,10 +3985,10 @@ function plot_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, neutra end """ - plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothing, is=1, - fig=nothing, ax=nothing, outfile=nothing, yscale=identity, - transform=identity, rasterize=true, subtitles=nothing, - axis_args=Dict{Symbol,Any}(), kwargs...) + plot_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + it=nothing, is=1, fig=nothing, ax=nothing, outfile=nothing, + yscale=identity, transform=identity, rasterize=true, + subtitles=nothing, axis_args=Dict{Symbol,Any}(), kwargs...) Plot unnormalized distribution function against \$v_\\parallel\$ and z. @@ -3977,8 +3999,9 @@ The information for the runs to plot is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are displayed in a horizontal row. -By default plots the ion distribution function. If `neutrals=true` is passed, plots the -neutral distribution function instead. +By default plots the ion distribution function. If `electron=true` is passed, plots the +electron distribution function instead. If `neutral=true` is passed, plots the neutral +distribution function instead. `is` selects which species to analyse. @@ -4017,24 +4040,24 @@ Any extra `kwargs` are passed to [`plot_2d`](@ref). """ function plot_f_unnorm_vs_vpa_z end -function plot_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothing, - axis_args=Dict{Symbol,Any}(), title=nothing, - subtitles=nothing, kwargs...) +function plot_f_unnorm_vs_vpa_z(run_info::Tuple; electron=false, neutral=false, + outfile=nothing, axis_args=Dict{Symbol,Any}(), + title=nothing, subtitles=nothing, kwargs...) try n_runs = length(run_info) if subtitles === nothing subtitles = Tuple(nothing for _ ∈ 1:n_runs) end if title !== nothing - title = neutral ? L"f_{n,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" + title = neutral ? L"f_{n,\mathrm{unnormalized}}" : electron ? L"f_{e,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" end fig, axes, colorbar_places = get_2d_ax(n_runs; title=title, xlabel=L"v_\parallel", ylabel=L"z", axis_args...) for (ri, ax, colorbar_place, st) ∈ zip(run_info, axes, colorbar_places, subtitles) - plot_f_unnorm_vs_vpa_z(ri; neutral=neutral, ax=ax, colorbar_place=colorbar_place, - title=st, kwargs...) + plot_f_unnorm_vs_vpa_z(ri; electron=electron, neutral=neutral, ax=ax, + colorbar_place=colorbar_place, title=st, kwargs...) end if outfile !== nothing @@ -4049,10 +4072,17 @@ function plot_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothing, end end -function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothing, is=1, - fig=nothing, ax=nothing, colorbar_place=nothing, title=nothing, - outfile=nothing, transform=identity, rasterize=true, +function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + it=nothing, is=1, fig=nothing, ax=nothing, + colorbar_place=nothing, title=nothing, outfile=nothing, + transform=identity, rasterize=true, axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4069,7 +4099,7 @@ function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothi if ax === nothing if title === nothing - title = neutral ? L"f_{n,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" + title = neutral ? L"f_{n,\mathrm{unnormalized}}" : electron ? L"f_{e,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" end fig, ax, colorbar_place = get_2d_ax(; title=title, xlabel=L"v_\parallel", ylabel=L"z", axis_args...) @@ -4089,10 +4119,12 @@ function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothi vth = get_variable(run_info, "thermal_speed_neutral"; it=it, is=is, ir=input.ir0) vpa_grid = run_info.vz.grid else - f = get_variable(run_info, "f"; it=it, is=is, ir=input.ir0, ivperp=input.ivperp0) - density = get_variable(run_info, "density"; it=it, is=is, ir=input.ir0) - upar = get_variable(run_info, "parallel_flow"; it=it, is=is, ir=input.ir0) - vth = get_variable(run_info, "thermal_speed"; it=it, is=is, ir=input.ir0) + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = get_variable(run_info, "f$suffix"; it=it, is=is, ir=input.ir0, ivperp=input.ivperp0) + density = get_variable(run_info, "$(prefix)density"; it=it, is=is, ir=input.ir0) + upar = get_variable(run_info, "$(prefix)parallel_flow"; it=it, is=is, ir=input.ir0) + vth = get_variable(run_info, "$(prefix)thermal_speed"; it=it, is=is, ir=input.ir0) vpa_grid = run_info.vpa.grid end @@ -4124,8 +4156,8 @@ function plot_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, it=nothi end """ - animate_f_unnorm_vs_vpa(run_info; input=nothing, neutral=false, is=1, iz=nothing, - fig=nothing, ax=nothing, frame_index=nothing, + animate_f_unnorm_vs_vpa(run_info; input=nothing, electron=false, neutral=false, is=1, + iz=nothing, fig=nothing, ax=nothing, frame_index=nothing, outfile=nothing, yscale=identity, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) @@ -4138,8 +4170,9 @@ The information for the runs to animate is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are overlayed on the same axis. -By default animates the ion distribution function. If `neutrals=true` is passed, animates -the neutral distribution function instead. +By default animates the ion distribution function. If `electron=true` is passed, animates +the electron distribution function instead. If `neutral=true` is passed, animates the +neutral distribution function instead. `is` selects which species to analyse. @@ -4174,14 +4207,15 @@ to handle time-varying coordinates so cannot use [`animate_1d`](@ref)). """ function animate_f_unnorm_vs_vpa end -function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=false, - outfile=nothing, axis_args=Dict{Symbol,Any}(), kwargs...) +function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, electron=false, + neutral=false, outfile=nothing, + axis_args=Dict{Symbol,Any}(), kwargs...) try n_runs = length(run_info) frame_index = Observable(1) - species_label = neutral ? "n" : "i" + species_label = neutral ? "n" : electron ? "e" : "i" divide_by = f_over_vpa2 ? L"/v_\parallel^2" : "" ylabel = L"f_{%$species_label,\mathrm{unnormalized}}%$divide_by" if length(run_info) == 1 || all(all(isapprox.(ri.time, run_info[1].time)) for ri ∈ run_info[2:end]) @@ -4196,8 +4230,9 @@ function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=fal axis_args...) for ri ∈ run_info - animate_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, neutral=neutral, ax=ax, - frame_index=frame_index, kwargs...) + animate_f_unnorm_vs_vpa(ri; f_over_vpa2=f_over_vpa2, electron=electron, + neutral=neutral, ax=ax, frame_index=frame_index, + kwargs...) end if n_runs > 1 @@ -4218,10 +4253,16 @@ function animate_f_unnorm_vs_vpa(run_info::Tuple; f_over_vpa2=false, neutral=fal end function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, - neutral=false, is=1, iz=nothing, fig=nothing, ax=nothing, - frame_index=nothing, outfile=nothing, yscale=nothing, - transform=identity, axis_args=Dict{Symbol,Any}(), - kwargs...) + electron=false, neutral=false, is=1, iz=nothing, + fig=nothing, ax=nothing, frame_index=nothing, + outfile=nothing, yscale=nothing, transform=identity, + axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4258,12 +4299,14 @@ function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, vth = get_variable(run_info, "thermal_speed_neutral"; is=is, ir=input.ir0, iz=iz) vcoord = run_info.vz else - f = VariableCache(run_info, "f", chunk_size_2d; it=nothing, is=is, ir=input.ir0, iz=iz, - ivperp=input.ivperp0, ivpa=nothing, ivzeta=nothing, ivr=nothing, - ivz=nothing) - density = get_variable(run_info, "density"; is=is, ir=input.ir0, iz=iz) - upar = get_variable(run_info, "parallel_flow"; is=is, ir=input.ir0, iz=iz) - vth = get_variable(run_info, "thermal_speed"; is=is, ir=input.ir0, iz=iz) + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = VariableCache(run_info, "f$suffix", chunk_size_2d; it=nothing, is=is, + ir=input.ir0, iz=iz, ivperp=input.ivperp0, ivpa=nothing, + ivzeta=nothing, ivr=nothing, ivz=nothing) + density = get_variable(run_info, "$(prefix)density"; is=is, ir=input.ir0, iz=iz) + upar = get_variable(run_info, "$(prefix)parallel_flow"; is=is, ir=input.ir0, iz=iz) + vth = get_variable(run_info, "$(prefix)thermal_speed"; is=is, ir=input.ir0, iz=iz) vcoord = run_info.vpa end @@ -4347,8 +4390,8 @@ function animate_f_unnorm_vs_vpa(run_info; f_over_vpa2=false, input=nothing, end """ - animate_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, is=1, - fig=nothing, ax=nothing, frame_index=nothing, + animate_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + is=1, fig=nothing, ax=nothing, frame_index=nothing, outfile=nothing, yscale=identity, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) @@ -4361,8 +4404,9 @@ The information for the runs to plot is passed in `run_info` (as returned by [`get_run_info`](@ref)). If `run_info` is a Tuple, comparison plots are made where plots from the different runs are displayed in a horizontal row. -By default animates the ion distribution function. If `neutrals=true` is passed, animates -the neutral distribution function instead. +By default animates the ion distribution function. If `electron=true` is passed, animates +the electron distribution function instead. If `neutral=true` is passed, animates the +neutral distribution function instead. `is` selects which species to analyse. @@ -4394,14 +4438,15 @@ we have to handle time-varying coordinates so cannot use [`animate_2d`](@ref)). """ function animate_f_unnorm_vs_vpa_z end -function animate_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothing, - axis_args=Dict{Symbol,Any}(), kwargs...) +function animate_f_unnorm_vs_vpa_z(run_info::Tuple; electron=false, neutral=false, + outfile=nothing, axis_args=Dict{Symbol,Any}(), + kwargs...) try n_runs = length(run_info) frame_index = Observable(1) - var_name = neutral ? L"f_{n,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" + var_name = neutral ? L"f_{n,\mathrm{unnormalized}}" : electron ? L"f_{e,\mathrm{unnormalized}}" : L"f_{i,\mathrm{unnormalized}}" if length(run_info) > 1 title = var_name subtitles = (lift(i->LaTeXString(string(ri.run_name, "\nt = ", ri.time[i])), @@ -4418,7 +4463,7 @@ function animate_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothi axis_args...) for (ri, ax, colorbar_place) ∈ zip(run_info, axes, colorbar_places) - animate_f_unnorm_vs_vpa_z(ri; neutral=neutral, ax=ax, + animate_f_unnorm_vs_vpa_z(ri; electron=electron, neutral=neutral, ax=ax, colorbar_place=colorbar_place, frame_index=frame_index, kwargs...) end @@ -4436,11 +4481,17 @@ function animate_f_unnorm_vs_vpa_z(run_info::Tuple; neutral=false, outfile=nothi end end -function animate_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, is=1, - fig=nothing, ax=nothing, colorbar_place=nothing, +function animate_f_unnorm_vs_vpa_z(run_info; input=nothing, electron=false, neutral=false, + is=1, fig=nothing, ax=nothing, colorbar_place=nothing, frame_index=nothing, outfile=nothing, transform=identity, axis_args=Dict{Symbol,Any}(), kwargs...) + + if electron && neutral + error("does not make sense to pass electron=true and neutral=true at the same " + * "time") + end + if input === nothing if neutral input = Dict_to_NamedTuple(input_dict_dfns["f_neutral"]) @@ -4478,17 +4529,19 @@ function animate_f_unnorm_vs_vpa_z(run_info; input=nothing, neutral=false, is=1, ivzeta=nothing, ivr=nothing, ivz=nothing) vpa_grid = run_info.vz.grid else - f = VariableCache(run_info, "f", chunk_size_2d; it=nothing, is=is, ir=input.ir0, - iz=nothing, ivperp=input.ivperp0, ivpa=nothing, ivzeta=nothing, - ivr=nothing, ivz=nothing) - density = VariableCache(run_info, "density", chunk_size_1d; it=nothing, is=is, - ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, - ivzeta=nothing, ivr=nothing, ivz=nothing) - upar = VariableCache(run_info, "parallel_flow", chunk_size_1d; it=nothing, is=is, - ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, - ivzeta=nothing, ivr=nothing, ivz=nothing) - vth = VariableCache(run_info, "thermal_speed", chunk_size_1d; it=nothing, is=is, - ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, + suffix = electron ? "_electron" : "" + prefix = electron ? "electron_" : "" + f = VariableCache(run_info, "f$suffix", chunk_size_2d; it=nothing, is=is, + ir=input.ir0, iz=nothing, ivperp=input.ivperp0, ivpa=nothing, + ivzeta=nothing, ivr=nothing, ivz=nothing) + density = VariableCache(run_info, "$(prefix)density", chunk_size_1d; it=nothing, + is=is, ir=input.ir0, iz=nothing, ivperp=nothing, + ivpa=nothing, ivzeta=nothing, ivr=nothing, ivz=nothing) + upar = VariableCache(run_info, "$(prefix)parallel_flow", chunk_size_1d; + it=nothing, is=is, ir=input.ir0, iz=nothing, ivperp=nothing, + ivpa=nothing, ivzeta=nothing, ivr=nothing, ivz=nothing) + vth = VariableCache(run_info, "$(prefix)thermal_speed", chunk_size_1d; it=nothing, + is=is, ir=input.ir0, iz=nothing, ivperp=nothing, ivpa=nothing, ivzeta=nothing, ivr=nothing, ivz=nothing) vpa_grid = run_info.vpa.grid end @@ -7637,6 +7690,14 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n plot_1d(time, @view failure_caused_by_per_output[counter,:]; linestyle=:dash, label=prefix * "failures caused by p_electron", ax=ax_failures) + if !electron && ri.composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + # Kinetic electron nonlinear solver failure + counter += 1 + plot_1d(time, @view failure_caused_by_per_output[counter,:]; + linestyle=:dash, label=prefix * "failures caused by kinetic electron solve", + ax=ax_failures) + end end if !electron && ri.n_neutral_species > 0 # Neutral pdf failure counter @@ -8002,13 +8063,6 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n plot_1d(time, linear_iterations, label=prefix * " " * p * " L per NL", ax=ax) end end - - if ri.composition.electron_physics ∈ (kinetic_electrons, - kinetic_electrons_with_temperature_equation) - has_nl_solver = true - electron_steps_per_ion_step = get_variable(ri, "electron_steps_per_ion_step") - plot_1d(time, electron_steps_per_ion_step, label=prefix * " electron steps per solve", ax=ax) - end end if has_nl_solver @@ -8026,6 +8080,47 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n e, "Error in timestep_diagnostics() nl_solvers_fig.") end + + try + # Plot electron solver diagnostics + electron_solver_fig, ax = get_1d_ax(; xlabel="time", ylabel="electron steps per ion step") + + has_electron_solve = false + for ri ∈ run_info + if length(run_info) == 1 + prefix = "" + else + prefix = ri.run_name * " " + end + if it !== nothing + time = ri.time[it] + else + time = ri.time + end + + if ri.composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + has_electron_solve = true + electron_steps_per_ion_step = get_variable(ri, "electron_steps_per_ion_step") + plot_1d(time, electron_steps_per_ion_step, label=prefix * " electron steps per solve", ax=ax) + end + end + + if has_electron_solve + put_legend_right(electron_solver_fig, ax) + + if has_electron_solve + outfile = plot_prefix * "electron_steps.pdf" + save(outfile, electron_solver_fig) + else + display(electron_solver_fig) + end + end + catch e + makie_post_processing_error_handler( + e, + "Error in timestep_diagnostics() nl_solvers_fig.") + end end if input.animate_CFL diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml index 8de860c52..fc61043c9 100644 --- a/moment_kinetics/Project.toml +++ b/moment_kinetics/Project.toml @@ -17,7 +17,6 @@ LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882" LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LsqFit = "2fda8390-95c7-5789-9bda-21331edee243" -MINPACK = "4854310b-de5a-5eb6-a2a5-c1dee2bd17f9" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" @@ -31,6 +30,7 @@ Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SparseMatricesCSR = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/moment_kinetics/debug_test/kinetic_electron_inputs.jl b/moment_kinetics/debug_test/kinetic_electron_inputs.jl index 0d4e8d042..4f834c319 100644 --- a/moment_kinetics/debug_test/kinetic_electron_inputs.jl +++ b/moment_kinetics/debug_test/kinetic_electron_inputs.jl @@ -2,7 +2,7 @@ test_type = "Kinetic electron" using moment_kinetics.type_definitions: OptionsDict test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, - "n_neutral_species" => 1, + "n_neutral_species" => 0, #1, "electron_physics" => "kinetic_electrons", "recycling_fraction" => 0.5, "T_e" => 0.2, @@ -29,58 +29,55 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, "upar_phase" => 0.0, "temperature_amplitude" => 0.0, "temperature_phase" => 0.0), - "neutral_species_1" => OptionsDict("initial_density" => 1.0, - "initial_temperature" => 1.0), - "z_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 0.001, - "density_phase" => 0.0, - "upar_amplitude" => -1.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), - "vz_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", - "density_amplitude" => 1.0, - "density_phase" => 0.0, - "upar_amplitude" => 0.0, - "upar_phase" => 0.0, - "temperature_amplitude" => 0.0, - "temperature_phase" => 0.0), + #"neutral_species_1" => OptionsDict("initial_density" => 1.0, + # "initial_temperature" => 1.0), + #"z_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", + # "density_amplitude" => 0.001, + # "density_phase" => 0.0, + # "upar_amplitude" => -1.0, + # "upar_phase" => 0.0, + # "temperature_amplitude" => 0.0, + # "temperature_phase" => 0.0), + #"vz_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", + # "density_amplitude" => 1.0, + # "density_phase" => 0.0, + # "upar_amplitude" => 0.0, + # "upar_phase" => 0.0, + # "temperature_amplitude" => 0.0, + # "temperature_phase" => 0.0), "reactions" => OptionsDict("charge_exchange_frequency" => 0.75, "ionization_frequency" => 0.5), - "timestepping" => OptionsDict("type" => "Fekete4(3)", + "timestepping" => OptionsDict("type" => "PareschiRusso2(2,2,2)", "nstep" => 3, - "dt" => 2.0e-8, - "minimum_dt" => 1.0e-8, - "CFL_prefactor" => 1.0, - "step_update_prefactor" => 0.4, - "nwrite" => 2, - "split_operators" => false), - "electron_timestepping" => OptionsDict("type" => "Fekete4(3)", - "nstep" => 10, - "dt" => 4.0e-11, - "minimum_dt" => 2.0e-11, - "initialization_residual_value" => 1.e10, - "converged_residual_value" => 1.e10, + "dt" => 1.0e-9, + "nwrite" => 2,), + "electron_timestepping" => OptionsDict("dt" => 1.0e-6, + "initialization_residual_value" => 2.e3, + "converged_residual_value" => 1.e3, "nwrite" => 10000, "nwrite_dfns" => 10000, "no_restart" => true), + #"nonlinear_solver" => OptionsDict("rtol" => 1.0e-2, + # "atol" => 1.0e-3,), "r" => OptionsDict("ngrid" => 1, "nelement" => 1), "z" => OptionsDict("ngrid" => 3, - "nelement" => 24, + "nelement" => 1, "bc" => "wall", - "discretization" => "chebyshev_pseudospectral", - "element_spacing_option" => "sqrt"), - "vpa" => OptionsDict("ngrid" => 3, - "nelement" => 16, + "discretization" => "gausslegendre_pseudospectral", + "element_spacing_option" => "uniform"), + "vpa" => OptionsDict("ngrid" => 4, + "nelement" => 5, "L" => 6.0, "bc" => "zero", - "discretization" => "chebyshev_pseudospectral"), - "vz" => OptionsDict("ngrid" => 3, - "nelement" => 6, + "element_spacing_option" => "coarse_tails", + "discretization" => "gausslegendre_pseudospectral"), + "vz" => OptionsDict("ngrid" => 4, + "nelement" => 5, "L" => 6.0, "bc" => "zero", - "discretization" => "chebyshev_pseudospectral"), + "element_spacing_option" => "coarse_tails", + "discretization" => "gausslegendre_pseudospectral"), "ion_source_1" => OptionsDict("active" => true, "z_profile" => "gaussian", "z_width" => 0.125, diff --git a/moment_kinetics/debug_test/recycling_fraction_inputs.jl b/moment_kinetics/debug_test/recycling_fraction_inputs.jl index fec866af1..2d31efef4 100644 --- a/moment_kinetics/debug_test/recycling_fraction_inputs.jl +++ b/moment_kinetics/debug_test/recycling_fraction_inputs.jl @@ -65,12 +65,12 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, "discretization" => "chebyshev_pseudospectral", "element_spacing_option" => "sqrt"), "vpa" => OptionsDict("ngrid" => 3, - "nelement" => 2, + "nelement" => 3, "L" => 6.0, "bc" => "zero", "discretization" => "chebyshev_pseudospectral"), "vz" => OptionsDict("ngrid" => 3, - "nelement" => 2, + "nelement" => 4, "L" => 6.0, "bc" => "zero", "discretization" => "chebyshev_pseudospectral"), diff --git a/moment_kinetics/debug_test/runtest_template.jl b/moment_kinetics/debug_test/runtest_template.jl index 224c57ce7..1b97a1e68 100644 --- a/moment_kinetics/debug_test/runtest_template.jl +++ b/moment_kinetics/debug_test/runtest_template.jl @@ -3,6 +3,7 @@ using moment_kinetics.time_advance: time_advance! using moment_kinetics.communication using moment_kinetics.looping: all_dimensions, dimension_combinations, anyv_dimension_combinations +using moment_kinetics.type_definitions: OptionsDict using moment_kinetics.Glob using moment_kinetics.Primes @@ -60,8 +61,9 @@ function runtests(; restart=false) n_factors = length(factor(Vector, global_size[])) for input ∈ test_input_list, debug_loop_type ∈ dimension_combinations_to_test - if :sn ∈ debug_loop_type && "n_neutral_species" ∈ keys(input) && - input["n_neutral_species"] <= 0 + composition_section = get(input, "composition", OptionsDict()) + if :sn ∈ debug_loop_type && "n_neutral_species" ∈ keys(composition_section) && + composition_section["n_neutral_species"] <= 0 # Skip neutral dimension parallelisation options if the number of neutral # species is zero, as these would just be equivalent to running in serial continue @@ -73,24 +75,19 @@ function runtests(; restart=false) dims_to_test = debug_loop_type end for d ∈ all_dimensions - nelement_name = "$(d)_nelement" - if nelement_name ∈ keys(input) - nelement = input[nelement_name] - elseif d ∈ (:vperp, :vzeta, :vr) - nelement = 1 + dim_section = get(input, "$d", OptionsDict()) + if "nelement" ∈ keys(dim_section) + nelement = dim_section["nelement"] else # Dummy value, here it only matters if this is 1 or greater than 1 - nelement = 2 + nelement = 1 end - ngrid_name = "$(d)_ngrid" - if ngrid_name ∈ keys(input) - ngrid = input[ngrid_name] - elseif d ∈ (:vperp, :vzeta, :vr) - ngrid = 1 + if "ngrid" ∈ keys(dim_section) + ngrid = dim_section["ngrid"] else # Dummy value, here it only matters if this is 1 or greater than 1 - ngrid = 2 + ngrid = 1 end if nelement == 1 && ngrid == 1 diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl index 43ebfafef..0cbf013ad 100644 --- a/moment_kinetics/src/boundary_conditions.jl +++ b/moment_kinetics/src/boundary_conditions.jl @@ -146,16 +146,40 @@ function enforce_z_boundary_condition!(pdf, density, upar, ppar, phi, moments, b density_offset = 1.0 vwidth = 1.0 if z.irank == 0 - @loop_s_r_vperp_vpa is ir ivperp ivpa begin - if adv[is].speed[ivpa,1,ir] > 0.0 - pdf[ivpa,ivperp,1,ir,is] = density_offset * exp(-(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)/vwidth^2) / sqrt(pi) + @loop_s is begin + speed = adv[is].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + prefactor /= density[1,ir,is] + end + if moments.evolve_ppar + prefactor *= moments.ion.vth[1,ir,is] + end + @loop_vperp_vpa ivperp ivpa begin + if speed[1,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,1,ir,is] = prefactor * exp(-(speed[1,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end end end end if z.irank == z.nrank - 1 - @loop_s_r_vperp_vpa is ir ivperp ivpa begin - if adv[is].speed[ivpa,end,ir] > 0.0 - pdf[ivpa,ivperp,end,ir,is] = density_offset * exp(-(vpa.grid[ivpa]^2 + vperp.grid[ivperp]^2)/vwidth^2) / sqrt(pi) + @loop_s is begin + speed = adv[is].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + prefactor /= density[end,ir,is] + end + if moments.evolve_ppar + prefactor *= moments.ion.vth[end,ir,is] + end + @loop_vperp_vpa ivperp ivpa begin + if speed[end,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,end,ir,is] = prefactor * exp(-(speed[end,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end end end end @@ -330,20 +354,42 @@ function enforce_neutral_z_boundary_condition!(pdf, density, uz, pz, moments, de density_offset = 1.0 vwidth = 1.0 if z.irank == 0 - @loop_sn_r_vzeta_vr_vz isn ir ivzeta ivr ivz begin - if adv[isn].speed[ivz,ivr,ivzeta,1,ir] > 0.0 - pdf[ivz,ivr,ivzeta,1,ir,is] = density_offset * - exp(-(vzeta.grid[ivzeta]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) / - sqrt(pi) + @loop_sn isn begin + speed = adv[isn].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + density_offset /= density[1,ir,isn] + end + if moments.evolve_ppar + density_offset *= moments.neutral.vth[1,ir,isn] + end + @loop_vzeta_vr_vz ivzeta ivr ivz begin + if speed[1,ivz,ivr,ivzeta,ir] > 0.0 + pdf[ivz,ivr,ivzeta,1,ir,isn] = prefactor * + exp(-(speed[1,ivz,ivr,ivzeta,ir]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) + end + end end end end if z.irank == z.nrank - 1 - @loop_sn_r_vzeta_vr_vz isn ir ivzeta ivr ivz begin - if adv[isn].speed[ivz,ivr,ivzeta,end,ir] > 0.0 - pdf[ivz,ivr,ivzeta,end,ir,is] = density_offset * - exp(-(vzeta.grid[ivzeta]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) / - sqrt(pi) + @loop_sn isn begin + speed = adv[isn].speed + @loop_r ir begin + prefactor = density_offset + if moments.evolve_density + density_offset /= density[end,ir,isn] + end + if moments.evolve_ppar + density_offset *= moments.neutral.vth[end,ir,isn] + end + @loop_vzeta_vr_vz ivzeta ivr ivz begin + if speed[end,ivz,ivr,ivzeta,ir] > 0.0 + pdf[ivz,ivr,ivzeta,end,ir,isn] = prefactor * + exp(-(speed[end,ivz,ivr,ivzeta,ir][ivzeta]^2 + vr.grid[ivr] + vz.grid[ivz])/vwidth^2) + end + end end end end @@ -1003,24 +1049,33 @@ function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,5}, bc, vpe end function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,4}, bc, vperp, vperp_spectral, vperp_advect, diffusion) + @loop_r ir begin + @views enforce_vperp_boundary_condition!(f[:,:,:,ir], bc, vperp, vperp_spectral, + vperp_advect, diffusion, ir) + end + return nothing +end + +function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,3}, bc, vperp, + vperp_spectral, vperp_advect, diffusion, ir) if bc == "zero" || bc == "zero-impose-regularity" nvperp = vperp.n ngrid = vperp.ngrid # set zero boundary condition - @loop_r_z_vpa ir iz ivpa begin + @loop_z_vpa iz ivpa begin if diffusion || vperp_advect.speed[nvperp,ivpa,iz,ir] < 0.0 - f[ivpa,nvperp,iz,ir] = 0.0 + f[ivpa,nvperp,iz] = 0.0 end end # set regularity condition d F / d vperp = 0 at vperp = 0 if bc == "zero-impose-regularity" && (vperp.discretization == "gausslegendre_pseudospectral" || vperp.discretization == "chebyshev_pseudospectral") D0 = vperp_spectral.radau.D0 buffer = @view vperp.scratch[1:ngrid-1] - @loop_r_z_vpa ir iz ivpa begin + @loop_z_vpa iz ivpa begin if diffusion || vperp_advect.speed[1,ivpa,iz,ir] > 0.0 # adjust F(vperp = 0) so that d F / d vperp = 0 at vperp = 0 - @views @. buffer = D0[2:ngrid] * f[ivpa,2:ngrid,iz,ir] - f[ivpa,1,iz,ir] = -sum(buffer)/D0[1] + @views @. buffer = D0[2:ngrid] * f[ivpa,2:ngrid,iz] + f[ivpa,1,iz] = -sum(buffer)/D0[1] end end elseif bc == "zero" @@ -1036,4 +1091,39 @@ function enforce_vperp_boundary_condition!(f::AbstractArray{mk_float,4}, bc, vpe end end +""" + skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa) + +This function returns `true` when the grid point specified by `iz`, `ivperp`, `ivpa` would +be set by the boundary conditions on the electron distribution function. When this +happens, the corresponding row should be skipped when adding contributions to the Jacobian +matrix, so that the row remains the same as a row of the identity matrix, so that the +Jacobian matrix does not modify those points. Returns `false` otherwise. +""" +function skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + # z boundary condition + # Treat as if using Dirichlet boundary condition for incoming part of the distribution + # function on the block boundary, regardless of the actual boundary condition and + # whether this is an internal boundary or an actual domain boundary. This prevents the + # matrix evaluated for a single block (without coupling to neighbouring blocks) from + # becoming singular + if iz == 1 && z_speed[iz,ivpa,ivperp] ≥ 0.0 + return true + end + if iz == z.n && z_speed[iz,ivpa,ivperp] ≤ 0.0 + return true + end + + # vperp boundary condition + if vperp.n > 1 && ivperp == vperp.n + return true + end + + if ivpa == 1 || ivpa == vpa.n + return true + end + + return false +end + end # boundary_conditions diff --git a/moment_kinetics/src/calculus.jl b/moment_kinetics/src/calculus.jl index 47036e077..dfb95e33b 100644 --- a/moment_kinetics/src/calculus.jl +++ b/moment_kinetics/src/calculus.jl @@ -550,7 +550,7 @@ function reconcile_element_boundaries_MPI!(df1d::AbstractArray{mk_float,Ndims}, # synchronize buffers _block_synchronize() end - + function apply_adv_fac!(buffer::AbstractArray{mk_float,Ndims},adv_fac::AbstractArray{mk_float,Ndims},endpoints::AbstractArray{mk_float,Ndims},sgn::mk_int) where Ndims #buffer contains off-process endpoint #adv_fac < 0 is positive advection speed @@ -647,6 +647,158 @@ function reconcile_element_boundaries_MPI!(df1d::AbstractArray{mk_float,Ndims}, _block_synchronize() end +# Special version for pdf_electron with no r-dimension, which has the same number of +# dimensions as an ion/neutral moment variable, but different dimensions. +function reconcile_element_boundaries_MPI_z_pdf_vpavperpz!(df1d::AbstractArray{mk_float,3}, + dfdx_lower_endpoints::AbstractArray{mk_float,2}, dfdx_upper_endpoints::AbstractArray{mk_float,2}, + receive_buffer1::AbstractArray{mk_float,2}, receive_buffer2::AbstractArray{mk_float,2}, coord) + + # synchronize buffers + # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop + # -- or from a @serial_region or from an if statment isolating a single rank on a block + _block_synchronize() + #if block_rank[] == 0 # lead process on this shared-memory block + @serial_region begin + + # now deal with endpoints that are stored across ranks + comm = coord.comm + nrank = coord.nrank + irank = coord.irank + #send_buffer = coord.send_buffer + #receive_buffer = coord.receive_buffer + # sending pattern is cyclic. First we send data form irank -> irank + 1 + # to fix the lower endpoints, then we send data from irank -> irank - 1 + # to fix upper endpoints. Special exception for the periodic points. + # receive_buffer[1] is for data received, send_buffer[1] is data to be sent + + # pass data from irank -> irank + 1, receive data from irank - 1 + idst = mod(irank+1,nrank) # destination rank for sent data + isrc = mod(irank-1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq1 = MPI.Irecv!(receive_buffer1, comm; source=isrc, tag=1) + sreq1 = MPI.Isend(dfdx_upper_endpoints, comm; dest=idst, tag=1) + #print("$irank: Sending $irank -> $idst = $dfdx_upper_endpoints\n") + + # pass data from irank -> irank - 1, receive data from irank + 1 + idst = mod(irank-1,nrank) # destination rank for sent data + isrc = mod(irank+1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq2 = MPI.Irecv!(receive_buffer2, comm; source=isrc, tag=2) + sreq2 = MPI.Isend(dfdx_lower_endpoints, comm; dest=idst, tag=2) + #print("$irank: Sending $irank -> $idst = $dfdx_lower_endpoints\n") + stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) + #print("$irank: Received $isrc -> $irank = $receive_buffer1\n") + #print("$irank: Received $isrc -> $irank = $receive_buffer2\n") + + # now update receive buffers, taking into account the reconciliation + if irank == 0 + if coord.bc == "periodic" + #update the extreme lower endpoint with data from irank = nrank -1 + receive_buffer1 .= 0.5*(receive_buffer1 .+ dfdx_lower_endpoints) + else #directly use value from Cheb + receive_buffer1 .= dfdx_lower_endpoints + end + else # enforce continuity at lower endpoint + receive_buffer1 .= 0.5*(receive_buffer1 .+ dfdx_lower_endpoints) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,1] .= receive_buffer1 + + if irank == nrank-1 + if coord.bc == "periodic" + #update the extreme upper endpoint with data from irank = 0 + receive_buffer2 .= 0.5*(receive_buffer2 .+ dfdx_upper_endpoints) + else #directly use value from Cheb + receive_buffer2 .= dfdx_upper_endpoints + end + else # enforce continuity at upper endpoint + receive_buffer2 .= 0.5*(receive_buffer2 .+ dfdx_upper_endpoints) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,end] .= receive_buffer2 + + end + # synchronize buffers + _block_synchronize() +end + +# Special version for pdf_electron with no r-dimension, which has the same number of +# dimensions as an ion/neutral moment variable, but different dimensions. +function reconcile_element_boundaries_MPI_z_pdf_vpavperpz!(df1d::AbstractArray{mk_float,3}, + adv_fac_lower_endpoints::AbstractArray{mk_float,2}, adv_fac_upper_endpoints::AbstractArray{mk_float,2}, + dfdx_lower_endpoints::AbstractArray{mk_float,2}, dfdx_upper_endpoints::AbstractArray{mk_float,2}, + receive_buffer1::AbstractArray{mk_float,2}, receive_buffer2::AbstractArray{mk_float,2}, coord) + + # synchronize buffers + # -- this all-to-all block communicate here requires that this function is NOT called from within a parallelised loop + # -- or from a @serial_region or from an if statment isolating a single rank on a block + _block_synchronize() + #if block_rank[] == 0 # lead process on this shared-memory block + @serial_region begin + # now deal with endpoints that are stored across ranks + comm = coord.comm + nrank = coord.nrank + irank = coord.irank + #send_buffer = coord.send_buffer + #receive_buffer = coord.receive_buffer + # sending pattern is cyclic. First we send data form irank -> irank + 1 + # to fix the lower endpoints, then we send data from irank -> irank - 1 + # to fix upper endpoints. Special exception for the periodic points. + # receive_buffer[1] is for data received, send_buffer[1] is data to be sent + + # send highest end point on THIS rank + # pass data from irank -> irank + 1, receive data from irank - 1 + idst = mod(irank+1,nrank) # destination rank for sent data + isrc = mod(irank-1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq1 = MPI.Irecv!(receive_buffer1, comm; source=isrc, tag=1) + sreq1 = MPI.Isend(dfdx_upper_endpoints, comm; dest=idst, tag=1) + #print("$irank: Sending $irank -> $idst = $dfdx_upper_endpoints\n") + + # send lowest end point on THIS rank + # pass data from irank -> irank - 1, receive data from irank + 1 + idst = mod(irank-1,nrank) # destination rank for sent data + isrc = mod(irank+1,nrank) # source rank for received data + #MRH what value should tag take here and below? Esp if nrank >= 32 + rreq2 = MPI.Irecv!(receive_buffer2, comm; source=isrc, tag=2) + sreq2 = MPI.Isend(dfdx_lower_endpoints, comm; dest=idst, tag=2) + #print("$irank: Sending $irank -> $idst = $dfdx_lower_endpoints\n") + stats = MPI.Waitall([rreq1, sreq1, rreq2, sreq2]) + #print("$irank: Received $isrc -> $irank = $receive_buffer1\n") + #print("$irank: Received $isrc -> $irank = $receive_buffer2\n") + + # now update receive buffers, taking into account the reconciliation + if irank == 0 + if coord.bc == "periodic" + # depending on adv_fac, update the extreme lower endpoint with data from irank = nrank -1 + apply_adv_fac!(receive_buffer1,adv_fac_lower_endpoints,dfdx_lower_endpoints,1) + else # directly use value from Cheb at extreme lower point + receive_buffer1 .= dfdx_lower_endpoints + end + else # depending on adv_fac, update the lower endpoint with data from irank = nrank -1 + apply_adv_fac!(receive_buffer1,adv_fac_lower_endpoints,dfdx_lower_endpoints,1) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,1] .= receive_buffer1 + + if irank == nrank-1 + if coord.bc == "periodic" + # depending on adv_fac, update the extreme upper endpoint with data from irank = 0 + apply_adv_fac!(receive_buffer2,adv_fac_upper_endpoints,dfdx_upper_endpoints,-1) + else #directly use value from Cheb + receive_buffer2 .= dfdx_upper_endpoints + end + else # enforce continuity at upper endpoint + apply_adv_fac!(receive_buffer2,adv_fac_upper_endpoints,dfdx_upper_endpoints,-1) + end + #now update the df1d array -- using a slice appropriate to the dimension reconciled + @views df1d[:,:,end] .= receive_buffer2 + + end + # synchronize buffers + _block_synchronize() +end + """ Computes the integral of the integrand, using the input wgts """ diff --git a/moment_kinetics/src/charge_exchange.jl b/moment_kinetics/src/charge_exchange.jl index ac460dba0..3f6095a22 100644 --- a/moment_kinetics/src/charge_exchange.jl +++ b/moment_kinetics/src/charge_exchange.jl @@ -120,39 +120,38 @@ function charge_exchange_collisions_single_species!(f_out, pdf_in, pdf_other, # values of dz/dt; as charge exchange and ionization collisions require # the evaluation of the pdf for species s' to obtain the update for species s, # will thus have to interpolate between the different vpa grids - if moments.evolve_ppar || moments.evolve_upar - if !moments.evolve_upar - # if evolve_ppar = true and evolve_upar = false, vpa coordinate is - # vpahat_s = vpa/vth_s; - # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); - # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations - # in terms of the vpahat_{s'} coordinate: - # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} - @. vpa.scratch = vpa.grid / vth_ratio - elseif !moments.evolve_ppar - # if evolve_ppar = false and evolve_upar = true, vpa coordinate is - # wpa_s = vpa-upar_s; - # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; - # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations - # in terms of the wpa_{s'} coordinate: - # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} - @. vpa.scratch = vpa.grid + upar[iz,ir] - upar_other[iz,ir] - else - # if evolve_ppar = true and evolve_upar = true, vpa coordinate is - # wpahat_s = (vpa-upar_s)/vth_s; - # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); - # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations - # in terms of the wpahat_{s'} coordinate: - # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} - @. vpa.scratch = (vpa.grid * vth[iz,ir] + upar[iz,ir] - upar_other[iz,ir]) / vth_other[iz,ir] - end - # interpolate to the new grid (passed in as vpa.scratch) - # and return interpolated values in vpa.scratch2 - @views interpolate_to_grid_vpa!(vpa.scratch2, vpa.scratch, pdf_other[:,iz,ir], vpa_other, spectral_other) + if moments.evolve_upar && moments.evolve_ppar + # if evolve_ppar = true and evolve_upar = true, vpa coordinate is + # wpahat_s = (vpa-upar_s)/vth_s; + # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); + # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations + # in terms of the wpahat_{s'} coordinate: + # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} + new_grid = @. vpa.scratch = (vpa.grid * vth[iz,ir] + upar[iz,ir] - upar_other[iz,ir]) / vth_other[iz,ir] + elseif !moments.evolve_upar + # if evolve_ppar = true and evolve_upar = false, vpa coordinate is + # vpahat_s = vpa/vth_s; + # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); + # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations + # in terms of the vpahat_{s'} coordinate: + # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} + new_grid = @. vpa.scratch = vpa.grid / vth_ratio + elseif !moments.evolve_ppar + # if evolve_ppar = false and evolve_upar = true, vpa coordinate is + # wpa_s = vpa-upar_s; + # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; + # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations + # in terms of the wpa_{s'} coordinate: + # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} + new_grid = @. vpa.scratch = vpa.grid + upar[iz,ir] - upar_other[iz,ir] else - # no need to interpolate if neither upar or ppar evolved separately from pdf - vpa.scratch2 .= pdf_other[:,iz,ir] + # Interpolate even when using 'drift-kinetic' mode, so that vpa and vz + # coordinates can be different. + new_grid = vpa.grid end + # interpolate to new_grid and return interpolated values in vpa.scratch2 + @views interpolate_to_grid_vpa!(vpa.scratch2, new_grid, pdf_other[:,iz,ir], vpa_other, spectral_other) + if neutrals @loop_vz ivz begin f_out[ivz,iz,ir] += dt * charge_exchange_frequency * density_other[iz,ir] * diff --git a/moment_kinetics/src/communication.jl b/moment_kinetics/src/communication.jl index 394446a4a..0c9080a36 100644 --- a/moment_kinetics/src/communication.jl +++ b/moment_kinetics/src/communication.jl @@ -397,7 +397,7 @@ end """ struct DebugMPISharedArray{T, N, TArray <: AbstractArray{T,N}, TIntArray <: AbstractArray{mk_int,N}, TBoolArray <: AbstractArray{Bool,N}} <: AbstractArray{T, N} data::TArray - accessed::Ref{Bool} + accessed::Base.RefValue{Bool} is_initialized::TIntArray is_read::TBoolArray is_written::TBoolArray diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index b76da95f1..0f80b4992 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -100,6 +100,9 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams} # scratch_shared2 is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared2::T + # scratch_shared3 is a shared-memory array used for intermediate calculations requiring + # n entries + scratch_shared3::T # scratch_2d and scratch2_2d are arrays used for intermediate calculations requiring # ngrid x nelement entries scratch_2d::Array{mk_float,2} @@ -306,15 +309,18 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, if ignore_MPI scratch_shared = allocate_float(n_local) scratch_shared2 = allocate_float(n_local) + scratch_shared3 = allocate_float(n_local) else scratch_shared = allocate_shared_float(n_local) scratch_shared2 = allocate_shared_float(n_local) + scratch_shared3 = allocate_shared_float(n_local) end - # Initialise scratch_shared and scratch_shared2 so that the debug checks do not - # complain when they get printed by `println(io, all_inputs)` in mk_input(). + # Initialise scratch_shared* so that the debug checks do not complain when they get + # printed by `println(io, all_inputs)` in mk_input(). if block_rank[] == 0 scratch_shared .= NaN scratch_shared2 .= NaN + scratch_shared3 .= NaN end if !ignore_MPI _block_synchronize() @@ -380,10 +386,11 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, coord_input.cheb_option, coord_input.bc, coord_input.boundary_parameters, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), - copy(scratch), scratch_shared, scratch_shared2, scratch_2d, copy(scratch_2d), - advection, send_buffer, receive_buffer, comm, local_io_range, global_io_range, - element_scale, element_shift, coord_input.element_spacing_option, - element_boundaries, radau_first_element, other_nodes, one_over_denominator) + copy(scratch), scratch_shared, scratch_shared2, scratch_shared3, scratch_2d, + copy(scratch_2d), advection, send_buffer, receive_buffer, comm, local_io_range, + global_io_range, element_scale, element_shift, + coord_input.element_spacing_option, element_boundaries, radau_first_element, + other_nodes, one_over_denominator) if coord.n == 1 && occursin("v", coord.name) spectral = null_velocity_dimension_info() @@ -466,6 +473,95 @@ function set_element_boundaries(nelement_global, L, element_spacing_option, coor for j in 1:nsqrt element_boundaries[(nelement_global+1)+ 1 - j] = (L/2.0) - fac*(L/2.0)*((j-1)/(nsqrt-1))^2 end + elseif startswith(element_spacing_option, "compressed") + element_spacing_option_split = split(element_spacing_option, "_") + if length(element_spacing_option_split) == 1 + compression_factor = 4.0 + else + compression_factor = parse(mk_float, element_spacing_option_split[2]) + end + + #shifted_inds = collect(mk_float, 0:nelement_global) .- 0.5 .* nelement_global + ## Choose element boundary positions to be given by + ## s = A*shifted_inds + B*shifted_inds^3 + ## Choose A and B so that, with simin=-nelement_global/2: + ## s(simin) = -L/2 + ## s(simin+1) = -L/2 + L/nelement_global/compression_factor + ## i.e. so that the grid spacing of the element nearest the wall is + ## compression_factor smaller than the elements in a uniformly spaced grid. + ## simin*A + simin^3*B = -L/2 + ## A = -(L/2 + simin^3*B)/simin + ## + ## (simin+1)*A + (simin+1)^3*B = -L/2 + L/nelement_global/compression_factor + ## -(simin+1)*(L/2 + simin^3*B)/simin + (simin+1)^3*B = -L/2 + L/nelement_global/compression_factor + ## -(simin+1)*simin^3*B/simin + (simin+1)^3*B = -L/2 + L/nelement_global/compression_factor + (simin+1)*L/2/simin + ## (simin+1)*simin^2*B - (simin+1)^3*B = L/2 - L/nelement_global/compression_factor - (simin+1)*L/2/simin + ## B = (L/2 - L/nelement_global/compression_factor - (simin+1)*L/2/simin) / ((simin+1)*simin^2 - (simin+1)^3) + + #simin = -nelement_global / 2.0 + #B = (L/2.0 - L/nelement_global/compression_factor - (simin+1.0)*L/2.0/simin) / ((simin+1.0)*simin^2 - (simin+1.0)^3) + #A = -(L/2.0 + simin^3*B)/simin + + #@. element_boundaries = A*shifted_inds + B*shifted_inds^3 + + # To have the grid spacing change as little as possible from one element to the + # next, the function that defines the element boundary positions should have + # constant curvature. The curvature has to change sign at the mid-point of the + # domain, so this means that the function must be defined piecewise - one piece + # for the lower half and one for the upper half. + # An apparently ideal way to do this would be to use a quadratic function, which + # would mean that the ratio of the sizes of adjacent elements is the same + # throughout the grid. However, a quadratic would mean a maximum compression + # factor of 2 before the function becomes non-monotonic, see next: + # We define the quadratic by making the gradient at the boundaries + # `compression_factor` larger than the gradient L of the linear function that + # would give a uniform grid. + # s(a) = A*a + B*a*|a| + # where -0.5≤a≤0.5, and + # s(0.5) = L/2 + # s'(0.5) = compression_factor*L + # so + # A/2 + B/4 = L/2 + # A + B = compression_factor*L + # ⇒ + # B = 2*(compression_factor - 1)*L + # A = L - B/2 = L - (compression_factor-1)*L = (2 - compression_factor)*L + # + # Therefore instead we choose a circular arc which can be monotonic while reaching + # any gradient. To make a circle sensible, normalise s by L for this version. + # (s-s0)^2 + (a-a0)^2 = r^2 + # where -0.5≤a≤0.5, and + # s(0) = 0 + # s(a) = 1/2 + # s'(a) = 1/compression_factor + # and for a>0, a0<0 and s0>0 while for a<0, a0>0 and s0<0. This gives + # s0^2 + a0^2 = r^2 + # (1/2-s0)^2 + (1/2-a0)^2 = r^2 = s0^2 + a0^2 + # 2*(1/2-s0)/compression_factor + 2*(1/2-a0) = 0 + # solving these + # a0 = (1/2-s0)/compression_factor + 1/2 + # 1/4 - s0 + s0^2 + 1/4 - a0 + a0^2 = s0^2 + a0^2 + # 1/2 - s0 - a0 = 0 + # s0 = 1/2 - a0 = 1/2 - (1/2-s0)/compression_factor - 1/2 + # (1 - 1/compression_factor)*s0 = -1/compression_factor/2 + # s0 = 1/compression_factor/2/(1/compression_factor-1) + if abs(compression_factor - 1.0) < 1.0e-12 + # compression_factor is too close to 1, which would be a singular value where + # s0=∞ and a0=-∞, so just use constant spacing. + for j in 1:nelement_global+1 + element_boundaries[j] = L*((j-1)/(nelement_global) - 0.5) + end + else + s0 = 1.0 / compression_factor / 2.0 / (1.0 / compression_factor - 1.0) + a0 = (0.5 - s0)/compression_factor + 0.5 + a = collect(0:nelement_global) ./ nelement_global .- 0.5 + mid_ind_plus = (nelement_global + 1) ÷ 2 + 1 + mid_ind_minus = nelement_global ÷ 2 + 1 + @. element_boundaries[1:mid_ind_minus] = + -L * (sqrt(s0^2 + a0^2 - (a[1:mid_ind_minus]+a0)^2) + s0) + @. element_boundaries[mid_ind_plus:end] = + L * (sqrt(s0^2 + a0^2 - (a[mid_ind_plus:end]-a0)^2) + s0) + end elseif element_spacing_option == "coarse_tails" # Element boundaries at # diff --git a/moment_kinetics/src/derivatives.jl b/moment_kinetics/src/derivatives.jl index 71bd31427..cd8cf84c0 100644 --- a/moment_kinetics/src/derivatives.jl +++ b/moment_kinetics/src/derivatives.jl @@ -11,10 +11,14 @@ module derivatives export derivative_r!, derivative_r_chrg!, derivative_r_ntrl! export derivative_z!, derivative_z_chrg!, derivative_z_ntrl! -using ..calculus: derivative!, second_derivative!, reconcile_element_boundaries_MPI! +using ..calculus: derivative!, second_derivative!, reconcile_element_boundaries_MPI!, + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!, apply_adv_fac! +using ..communication using ..type_definitions: mk_float using ..looping +using MPI + """ Centered derivatives df/dr group of rountines for @@ -241,6 +245,32 @@ function derivative_z!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_floa end end +# df/dz +# 3D version for f[vpa,vperp,z]. Uses modified function name to avoid clash with 'standard' +# 3D version for ion/neutral moments. +function derivative_z_pdf_vpavperpz!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_float,3}, + dfdz_lower_endpoints::AbstractArray{mk_float,2}, + dfdz_upper_endpoints::AbstractArray{mk_float,2}, + z_receive_buffer1::AbstractArray{mk_float,2}, + z_receive_buffer2::AbstractArray{mk_float,2}, z_spectral, z) + + # differentiate f w.r.t z + @loop_vperp_vpa ivperp ivpa begin + @views derivative!(dfdz[ivpa,ivperp,:], f[ivpa,ivperp,:], z, z_spectral) + # get external endpoints to reconcile via MPI + dfdz_lower_endpoints[ivpa,ivperp] = z.scratch_2d[1,1] + dfdz_upper_endpoints[ivpa,ivperp] = z.scratch_2d[end,end] + end + + # now reconcile element boundaries across + # processes with large message + if z.nelement_local < z.nelement_global + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + dfdz, dfdz_lower_endpoints, dfdz_upper_endpoints, z_receive_buffer1, + z_receive_buffer2, z) + end +end + #5D version for f[vpa,vperp,z,r,s] -> dfn ions function derivative_z!(dfdz::AbstractArray{mk_float,5}, f::AbstractArray{mk_float,5}, dfdz_lower_endpoints::AbstractArray{mk_float,4}, @@ -790,6 +820,36 @@ function derivative_z!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_floa end end +# df/dz +# 3D version for f[vpa,vperp,z]. Uses modified function name to avoid clash with 'standard' +# 3D version for ion/neutral moments. +function derivative_z_pdf_vpavperpz!(dfdz::AbstractArray{mk_float,3}, f::AbstractArray{mk_float,3}, + adv_fac, adv_fac_lower_buffer::AbstractArray{mk_float,2}, + adv_fac_upper_buffer::AbstractArray{mk_float,2}, + dfdz_lower_endpoints::AbstractArray{mk_float,2}, + dfdz_upper_endpoints::AbstractArray{mk_float,2}, + z_receive_buffer1::AbstractArray{mk_float,2}, + z_receive_buffer2::AbstractArray{mk_float,2}, z_spectral, z) + + # differentiate f w.r.t z + @loop_vperp_vpa ivperp ivpa begin + @views derivative!(dfdz[ivpa,ivperp,:], f[ivpa,ivperp,:], z, adv_fac[:,ivpa,ivperp], z_spectral) + # get external endpoints to reconcile via MPI + dfdz_lower_endpoints[ivpa,ivperp] = z.scratch_2d[1,1] + dfdz_upper_endpoints[ivpa,ivperp] = z.scratch_2d[end,end] + adv_fac_lower_buffer[ivpa,ivperp] = adv_fac[1,ivpa,ivperp] + adv_fac_upper_buffer[ivpa,ivperp] = adv_fac[end,ivpa,ivperp] + end + + # now reconcile element boundaries across + # processes with large message + if z.nelement_local < z.nelement_global + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + dfdz, adv_fac_lower_buffer, adv_fac_upper_buffer, dfdz_lower_endpoints, + dfdz_upper_endpoints, z_receive_buffer1, z_receive_buffer2, z) + end +end + #5D version for f[vpa,vperp,z,r,s] -> dfn ion particles function derivative_z!(dfdz::AbstractArray{mk_float,5}, f::AbstractArray{mk_float,5}, advect, adv_fac_lower_buffer::AbstractArray{mk_float,4}, diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 3b661975a..cead7d5f5 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -4,6 +4,8 @@ export calculate_electron_density! export calculate_electron_upar_from_charge_conservation! export calculate_electron_moments! export electron_energy_equation! +export electron_energy_equation_no_r! +export add_electron_energy_equation_to_Jacobian! export calculate_electron_qpar! export calculate_electron_parallel_friction_force! export calculate_electron_qpar_from_pdf! @@ -164,7 +166,26 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron ion_density, ion_upar, ion_ppar, density_neutral, uz_neutral, pz_neutral, moments, collisions, dt, composition, electron_source_settings, num_diss_params, - z; conduction=true) + r, z; conduction=true) + for ir ∈ 1:r.n + @views electron_energy_equation_no_r!(ppar_out[:,ir], ppar_in[:,ir], + electron_density[:,ir], electron_upar[:,ir], + ion_density[:,ir,:], ion_upar[:,ir,:], + ion_ppar[:,ir,:], density_neutral[:,ir,:], + uz_neutral[:,ir,:], pz_neutral[:,ir,:], + moments, collisions, dt, composition, + electron_source_settings, num_diss_params, + z, ir; conduction=conduction) + end + return nothing +end + +function electron_energy_equation_no_r!(ppar_out, ppar_in, electron_density, + electron_upar, ion_density, ion_upar, ion_ppar, + density_neutral, uz_neutral, pz_neutral, moments, + collisions, dt, composition, + electron_source_settings, num_diss_params, z, ir; + conduction=true) if composition.electron_physics == kinetic_electrons_with_temperature_equation # Hacky way to implement temperature equation: # - convert ppar to T by dividing by density @@ -173,22 +194,22 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron # old density? For initial testing, only looking at the electron initialisation # where density is not updated, this does not matter). - begin_r_z_region() + begin_z_region() # define some abbreviated variables for convenient use in rest of function me_over_mi = composition.me_over_mi nu_ei = collisions.electron_fluid.nu_ei - T_in = moments.temp + T_in = @view moments.temp[:,ir] # calculate contribution to rhs of energy equation (formulated in terms of pressure) # arising from derivatives of ppar, qpar and upar - @loop_r_z ir iz begin + @loop_z iz begin # Convert ppar_out to temperature for most of this function - ppar_out[iz,ir] *= 2.0 / electron_density[iz,ir] - ppar_out[iz,ir] -= dt*(electron_upar[iz,ir]*moments.dT_dz[iz,ir] - + 2.0*T_in[iz,ir]*moments.dupar_dz[iz,ir]) + ppar_out[iz] *= 2.0 / electron_density[iz] + ppar_out[iz] -= dt*(electron_upar[iz]*moments.dT_dz[iz,ir] + + 2.0*T_in[iz]*moments.dupar_dz[iz,ir]) end if conduction - @loop_r_z ir iz begin - ppar_out[iz,ir] -= 2.0 * dt*moments.dqpar_dz[iz,ir] / electron_density[iz,ir] + @loop_z iz begin + ppar_out[iz] -= 2.0 * dt*moments.dqpar_dz[iz,ir] / electron_density[iz] end end # compute the contribution to the rhs of the energy equation @@ -196,17 +217,17 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron diffusion_coefficient = num_diss_params.electron.moment_dissipation_coefficient if diffusion_coefficient > 0.0 error("diffusion not implemented for electron temperature equation yet") - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt*diffusion_coefficient*moments.d2T_dz2[iz,ir] + @loop_z iz begin + ppar_out[iz] += dt*diffusion_coefficient*moments.d2T_dz2[iz,ir] end end # compute the contribution to the rhs of the energy equation # arising from electron-ion collisions if nu_ei > 0.0 - @loop_s_r_z is ir iz begin - ppar_out[iz,ir] += dt * 2.0 * (2 * me_over_mi * nu_ei * (2.0*ion_ppar[iz,ir,is]/ion_density[iz,ir,is] - T_in[iz,ir])) - ppar_out[iz,ir] += dt * 2.0 * ((2/3) * moments.parallel_friction[iz,ir] - * (ion_upar[iz,ir,is]-electron_upar[iz,ir])) / electron_density[iz,ir] + @loop_s_z is iz begin + ppar_out[iz] += dt * 2.0 * (2 * me_over_mi * nu_ei * (2.0*ion_ppar[iz,is]/ion_density[iz,is] - T_in[iz])) + ppar_out[iz] += dt * 2.0 * ((2/3) * moments.parallel_friction[iz,ir] + * (ion_upar[iz,is]-electron_upar[iz])) / electron_density[iz] end end # add in contributions due to charge exchange/ionization collisions @@ -215,20 +236,20 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron ionization_energy = collisions.reactions.ionization_energy if composition.n_neutral_species > 0 if abs(charge_exchange_electron) > 0.0 - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += + @loop_sn_z isn iz begin + ppar_out[iz] += dt * 2.0 * me_over_mi * charge_exchange_electron * ( - 2*(pz_neutral[iz,ir,isn] - - density_neutral[iz,ir,isn]*ppar_in[iz,ir]/electron_density[iz,ir]) + - (2/3)*density_neutral[iz,ir,isn] * - (uz_neutral[iz,ir,isn] - electron_upar[iz,ir])^2) + 2*(pz_neutral[iz,isn] - + density_neutral[iz,isn]*ppar_in[iz]/electron_density[iz]) + + (2/3)*density_neutral[iz,isn] * + (uz_neutral[iz,isn] - electron_upar[iz])^2) end end if abs(ionization_electron) > 0.0 - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += - dt * 2.0 * ionization_electron * density_neutral[iz,ir,isn] * ( - ppar_in[iz,ir] / electron_density[iz,ir] - + @loop_sn_z isn iz begin + ppar_out[iz] += + dt * 2.0 * ionization_electron * density_neutral[iz,isn] * ( + ppar_in[iz] / electron_density[iz] - ionization_energy) end end @@ -236,57 +257,57 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron for index ∈ eachindex(electron_source_settings) if electron_source_settings[index].active - @views pressure_source_amplitude = moments.external_source_pressure_amplitude[:, :, index] - @views density_source_amplitude = moments.external_source_density_amplitude[:, :, index] - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt * (2.0 * pressure_source_amplitude[iz,ir] - - T_in[iz,ir] * density_source_amplitude[iz,ir]) / - electron_density[iz,ir] + pressure_source_amplitude = @view moments.external_source_pressure_amplitude[:, ir, index] + density_source_amplitude = @view moments.external_source_density_amplitude[:, ir, index] + @loop_z iz begin + ppar_out[iz] += dt * (2.0 * pressure_source_amplitude[iz] + - T_in[iz] * density_source_amplitude[iz]) / + electron_density[iz] end end end # Now that forward-Euler step for temperature is finished, convert ppar_out back to # pressure. - @loop_r_z ir iz begin - ppar_out[iz,ir] *= 0.5 * electron_density[iz,ir] + @loop_z iz begin + ppar_out[iz] *= 0.5 * electron_density[iz] end else - begin_r_z_region() + begin_z_region() # define some abbreviated variables for convenient use in rest of function me_over_mi = composition.me_over_mi nu_ei = collisions.electron_fluid.nu_ei # calculate contribution to rhs of energy equation (formulated in terms of pressure) # arising from derivatives of ppar, qpar and upar - @loop_r_z ir iz begin - ppar_out[iz,ir] -= dt*(electron_upar[iz,ir]*moments.dppar_dz[iz,ir] - + 3*ppar_in[iz,ir]*moments.dupar_dz[iz,ir]) + @loop_z iz begin + ppar_out[iz] -= dt*(electron_upar[iz]*moments.dppar_dz[iz,ir] + + 3*ppar_in[iz]*moments.dupar_dz[iz,ir]) end if conduction - @loop_r_z ir iz begin - ppar_out[iz,ir] -= dt*moments.dqpar_dz[iz,ir] + @loop_z iz begin + ppar_out[iz] -= dt*moments.dqpar_dz[iz,ir] end end - # @loop_r_z ir iz begin - # ppar_out[iz,ir] -= dt*(electron_upar[iz,ir]*moments.dppar_dz[iz,ir] + # @loop_z iz begin + # ppar_out[iz] -= dt*(electron_upar[iz]*moments.dppar_dz[iz,ir] # + (2/3)*moments.dqpar_dz[iz,ir] - # + (5/3)*ppar_in[iz,ir]*moments.dupar_dz[iz,ir]) + # + (5/3)*ppar_in[iz]*moments.dupar_dz[iz,ir]) # end # compute the contribution to the rhs of the energy equation # arising from artificial diffusion diffusion_coefficient = num_diss_params.electron.moment_dissipation_coefficient if diffusion_coefficient > 0.0 - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt*diffusion_coefficient*moments.d2ppar_dz2[iz,ir] + @loop_z iz begin + ppar_out[iz] += dt*diffusion_coefficient*moments.d2ppar_dz2[iz,ir] end end # compute the contribution to the rhs of the energy equation # arising from electron-ion collisions if nu_ei > 0.0 - @loop_s_r_z is ir iz begin - ppar_out[iz,ir] += dt * (2 * me_over_mi * nu_ei * (ion_ppar[iz,ir,is] - ppar_in[iz,ir])) - ppar_out[iz,ir] += dt * ((2/3) * moments.parallel_friction[iz,ir] - * (ion_upar[iz,ir,is]-electron_upar[iz,ir])) + @loop_s_z is iz begin + ppar_out[iz] += dt * (2 * me_over_mi * nu_ei * (ion_ppar[iz,is] - ppar_in[iz])) + ppar_out[iz] += dt * ((2/3) * moments.parallel_friction[iz] + * (ion_upar[iz,is]-electron_upar[iz])) end end # add in contributions due to charge exchange/ionization collisions @@ -295,36 +316,36 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron ionization_electron = collisions.reactions.electron_ionization_frequency ionization_energy = collisions.reactions.ionization_energy if abs(charge_exchange_electron) > 0.0 - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += + @loop_sn_z isn iz begin + ppar_out[iz] += dt * me_over_mi * charge_exchange_electron * ( - 2*(electron_density[iz,ir]*pz_neutral[iz,ir,isn] - - density_neutral[iz,ir,isn]*ppar_in[iz,ir]) + - (2/3)*electron_density[iz,ir]*density_neutral[iz,ir,isn] * - (uz_neutral[iz,ir,isn] - electron_upar[iz,ir])^2) + 2*(electron_density[iz]*pz_neutral[iz,isn] - + density_neutral[iz,isn]*ppar_in[iz]) + + (2/3)*electron_density[iz]*density_neutral[iz,isn] * + (uz_neutral[iz,isn] - electron_upar[iz])^2) end end if abs(ionization_electron) > 0.0 - # @loop_s_r_z is ir iz begin - # ppar_out[iz,ir] += - # dt * ionization_electron * density_neutral[iz,ir,is] * ( - # ppar_in[iz,ir] - - # (2/3)*electron_density[iz,ir] * ionization_energy) + # @loop_s_z is iz begin + # ppar_out[iz] += + # dt * ionization_electron * density_neutral[iz,is] * ( + # ppar_in[iz] - + # (2/3)*electron_density[iz] * ionization_energy) # end - @loop_sn_r_z isn ir iz begin - ppar_out[iz,ir] += - dt * ionization_electron * density_neutral[iz,ir,isn] * ( - ppar_in[iz,ir] - - electron_density[iz,ir] * ionization_energy) + @loop_sn_z isn iz begin + ppar_out[iz] += + dt * ionization_electron * density_neutral[iz,isn] * ( + ppar_in[iz] - + electron_density[iz] * ionization_energy) end end end for index ∈ eachindex(electron_source_settings) if electron_source_settings[index].active - @views source_amplitude = moments.external_source_pressure_amplitude[:, :, index] - @loop_r_z ir iz begin - ppar_out[iz,ir] += dt * source_amplitude[iz,ir] + source_amplitude = @view moments.external_source_pressure_amplitude[:, ir, index] + @loop_z iz begin + ppar_out[iz] += dt * source_amplitude[iz] end end end @@ -333,10 +354,113 @@ function electron_energy_equation!(ppar_out, ppar_in, electron_density, electron return nothing end +function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, + vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, + composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir; f_offset=0, + ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.electron_fluid.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.reactions.electron_charge_exchange_frequency > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.reactions.electron_ionization_frequency > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + + me = composition.me_over_mi + z_deriv_matrix = z_spectral.D_matrix_csr + v_size = vperp.n * vpa.n + + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = ppar_offset + iz + + # Note that as + # q = 2 * p * vth * ∫dw_∥ w_∥^3 g + # = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g + # we have that + # d(q)/dz = 2 * p^(3/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz + # - p^(3/2) * sqrt(2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz + # + 3 * p^(1/2) * sqrt(2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz + # so for the Jacobian + # d(d(q)/dz)[irowz])/d(p[icolz]) + # = (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 d(g)/dz + # - 3/2 * sqrt(2) * p^(1/2) / n^(3/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(n)/dz + # + 3/2 * sqrt(2) / p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g * d(p)/dz)[irowz] * delta[irowz,icolz] + # + (3 * sqrt(2) * p^(1/2) / n^(1/2) / me^(1/2) * ∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + # d(d(q)/dz)[irowz])/d(g[icolvpa,icolvperp,icolz]) + # = (2 * sqrt(2) * p^(3/2) / n^(1/2) / me^(1/2))[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # + sqrt(2) * (-p^(3/2) / n^(3/2) / me^(1/2) * dn/dz + 3.0 * p^(1/2) / n^(1/2) / me^(1/2) * dp/dz)[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * delta[irowz,icolz] + + # upar*dppar_dz + z_deriv_row_startind = z_deriv_matrix.rowptr[iz] + z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 + z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] + z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += + dt * upar[iz] * z_deriv_entry + end + + # 3*ppar*dupar_dz + jacobian_matrix[row,row] += 3.0 * dt * dupar_dz[iz] + + # terms from d(qpar)/dz + jacobian_matrix[row,row] += + dt * (3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * dthird_moment_dz[iz] + - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz] + + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz]) + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_entry + end + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz] + + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + end + end + + return nothing +end + """ - electron_energy_residual!(residual, electron_ppar_out, fvec_in, moments, - collisions, composition, external_source_settings, - num_diss_params, z, dt) + electron_energy_residual!(residual, electron_ppar_out, electron_ppar, in, + fvec_in, moments, collisions, composition, + external_source_settings, num_diss_params, z, dt, ir) The residual is a function whose input is `electron_ppar`, so that when it's output `residual` is zero, electron_ppar is the result of a backward-Euler timestep: @@ -345,27 +469,34 @@ The residual is a function whose input is `electron_ppar`, so that when it's out This function assumes any needed moment derivatives are already calculated using `electron_ppar_out` and stored in `moments.electron`. + +Note that this function operates on a single point in `r`, given by `ir`, and `residual`, +`electron_ppar_out`, and `electron_ppar_in` should have no r-dimension. """ -function electron_energy_residual!(residual, electron_ppar_out, fvec_in, moments, - collisions, composition, external_source_settings, - num_diss_params, z, dt) - begin_r_z_region() - electron_ppar_in = fvec_in.electron_ppar - @loop_r_z ir iz begin - residual[iz,ir] = electron_ppar_in[iz,ir] +function electron_energy_residual!(residual, electron_ppar_out, electron_ppar, in, + fvec_in, moments, collisions, composition, + external_source_settings, num_diss_params, z, dt, ir) + begin_z_region() + @loop_z iz begin + residual[iz] = electron_ppar_in[iz] end - electron_energy_equation!(residual, electron_ppar_out, - fvec_in.density, fvec_in.electron_upar, fvec_in.density, - fvec_in.upar, fvec_in.ppar, fvec_in.density_neutral, - fvec_in.uz_neutral, fvec_in.pz_neutral, moments.electron, - collisions, dt, composition, - external_source_settings.electron, num_diss_params, z) + @views electron_energy_equation_no_r!(residual, electron_ppar_out, + fvec_in.electron_density[:,ir], + fvec_in.electron_upar[:,ir], + fvec_in.density[:,ir,:], fvec_in.upar[:,ir,:], + fvec_in.ppar[:,ir,:], + fvec_in.density_neutral[:,ir,:], + fvec_in.uz_neutral[:,ir,:], + fvec_in.pz_neutral[:,ir,:], moments.electron, + collisions, dt, composition, + external_source_settings.electron, + num_diss_params, z, ir) # Now # residual = f_in + dt*RHS(f_out) # so update to desired residual - begin_r_z_region() - @loop_r_z ir iz begin - residual[iz,ir] = (electron_ppar_out[iz,ir] - residual[iz,ir]) + begin_z_region() + @loop_z iz begin + residual[iz] = (electron_ppar_out[iz] - residual[iz]) end end @@ -464,8 +595,6 @@ function implicit_braginskii_conduction!(fvec_out, fvec_in, moments, z, r, dt, z end end - nl_solver_params.stage_counter[] += 1 - return true end @@ -610,6 +739,20 @@ function calculate_electron_qpar_from_pdf!(qpar, ppar, vth, pdf, vpa) end end +""" +Calculate the parallel component of the electron heat flux, defined as qpar = 2 * ppar * +vth * int dwpa (pdf * wpa^3). This version of the function does not loop over `r`. `pdf` +should have no r-dimension, while the moment variables are indexed at `ir`. +""" +function calculate_electron_qpar_from_pdf_no_r!(qpar, ppar, vth, pdf, vpa, ir) + # specialise to 1V for now + begin_z_region() + ivperp = 1 + @loop_z iz begin + @views qpar[iz] = 2*ppar[iz]*vth[iz]*integrate_over_vspace(pdf[:, ivperp, iz], vpa.grid.^3, vpa.wgts) + end +end + function calculate_electron_heat_source!(heat_source, ppar_e, dupar_dz, dens_n, ionization, ionization_energy, dens_e, ppar_i, nu_ei, me_over_mi, T_wall, z) begin_r_z_region() diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 6582e4ea0..07d8229df 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2,38 +2,59 @@ module electron_kinetic_equation using LinearAlgebra using MPI +using SparseArrays export get_electron_critical_velocities using ..looping using ..analysis: steady_state_residuals -using ..derivatives: derivative_z! +using ..derivatives: derivative_z!, derivative_z_pdf_vpavperpz! using ..boundary_conditions: enforce_v_boundary_condition_local!, - enforce_vperp_boundary_condition! -using ..calculus: derivative!, second_derivative!, integral + enforce_vperp_boundary_condition!, + skip_f_electron_bc_points_in_Jacobian, vpagrid_to_dzdt +using ..calculus: derivative!, second_derivative!, integral, + reconcile_element_boundaries_MPI!, + reconcile_element_boundaries_MPI_z_pdf_vpavperpz! using ..communication +using ..gauss_legendre: gausslegendre_info +using ..input_structs using ..interpolation: interpolate_to_grid_1d! using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float using ..electron_fluid_equations: calculate_electron_moments!, update_electron_vth_temperature!, calculate_electron_qpar_from_pdf!, + calculate_electron_qpar_from_pdf_no_r!, calculate_electron_parallel_friction_force! -using ..electron_fluid_equations: electron_energy_equation!, electron_energy_residual! -using ..electron_z_advection: electron_z_advection!, update_electron_speed_z! -using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa! +using ..electron_fluid_equations: electron_energy_equation!, + electron_energy_equation_no_r!, + add_electron_energy_equation_to_Jacobian!, + electron_energy_residual! +using ..electron_z_advection: electron_z_advection!, update_electron_speed_z!, + add_electron_z_advection_to_Jacobian! +using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa!, + add_electron_vpa_advection_to_Jacobian! using ..em_fields: update_phi! -using ..external_sources: total_external_electron_sources! +using ..external_sources: total_external_electron_sources!, + add_total_external_electron_source_to_Jacobian! using ..file_io: get_electron_io_info, write_electron_state, finish_electron_io -using ..krook_collisions: electron_krook_collisions! +using ..krook_collisions: electron_krook_collisions!, get_collision_frequency_ee, + get_collision_frequency_ei, + add_electron_krook_collisions_to_Jacobian! using ..moment_constraints: hard_force_moment_constraints!, - moment_constraints_on_residual! + moment_constraints_on_residual!, + electron_implicit_constraint_forcing!, + add_electron_implicit_constraint_forcing_to_Jacobian! using ..moment_kinetics_structs: scratch_pdf, scratch_electron_pdf, electron_pdf_substruct -using ..nonlinear_solvers: newton_solve! +using ..nonlinear_solvers using ..runge_kutta: rk_update_variable!, rk_loworder_solution!, local_error_norm, adaptive_timestep_update_t_params! using ..utils: get_minimum_CFL_z, get_minimum_CFL_vpa -using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_derivatives! +using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_derivatives!, + calculate_electron_moment_derivatives_no_r! + +# Only needed so we can reference it in a docstring +import ..runge_kutta """ update_electron_pdf is a function that uses the electron kinetic equation @@ -68,12 +89,12 @@ OUTPUT: function update_electron_pdf!(scratch, pdf, moments, phi, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, t_params, collisions, composition, external_source_settings, num_diss_params, - max_electron_pdf_iterations, max_electron_sim_time; io_electron=nothing, - initial_time=nothing, residual_tolerance=nothing, evolve_ppar=false, - ion_dt=nothing) + nl_solver_params, max_electron_pdf_iterations, max_electron_sim_time; + io_electron=nothing, initial_time=nothing, residual_tolerance=nothing, + evolve_ppar=false, ion_dt=nothing, solution_method="backward_euler") # set the method to use to solve the electron kinetic equation - solution_method = "artificial_time_derivative" + #solution_method = "artificial_time_derivative" #solution_method = "shooting_method" #solution_method = "picard_iteration" # solve the electron kinetic equation using the specified method @@ -84,6 +105,13 @@ function update_electron_pdf!(scratch, pdf, moments, phi, r, z, vperp, vpa, z_sp external_source_settings, num_diss_params, max_electron_pdf_iterations, max_electron_sim_time; io_electron=io_electron, initial_time=initial_time, residual_tolerance=residual_tolerance, evolve_ppar=evolve_ppar, ion_dt=ion_dt) + elseif solution_method == "backward_euler" + return electron_backward_euler!(scratch, pdf, moments, phi, collisions, + composition, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, t_params, external_source_settings, + num_diss_params, nl_solver_params, max_electron_pdf_iterations, + max_electron_sim_time; io_electron=io_electron, initial_time=initial_time, + residual_tolerance=residual_tolerance, evolve_ppar=evolve_ppar, ion_dt=ion_dt) elseif solution_method == "shooting_method" dens = moments.electron.dens vthe = moments.electron.vth @@ -111,7 +139,7 @@ function update_electron_pdf!(scratch, pdf, moments, phi, r, z, vperp, vpa, z_sp dppar_dz, dqpar_dz, dvth_dz, z, vpa, vpa_spectral, scratch_dummy, max_electron_pdf_iterations) else - error("!!! invalid solution method specified !!!") + error("!!! invalid solution method '$solution_method' specified !!!") end return nothing end @@ -199,7 +227,7 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll moments.neutral.dens, moments.neutral.uz, moments.neutral.pz, moments.electron, collisions, ion_dt, composition, external_source_settings.electron, - num_diss_params, z) + num_diss_params, r, z) end if !evolve_ppar @@ -337,14 +365,17 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll end # Do a forward-Euler update of the electron pdf, and (if evove_ppar=true) the # electron parallel pressure. - electron_kinetic_equation_euler_update!(scratch[istage+1], scratch[istage], - moments, z, vperp, vpa, z_spectral, - vpa_spectral, z_advect, vpa_advect, - scratch_dummy, collisions, - composition, external_source_settings, - num_diss_params, t_params.dt[]; - evolve_ppar=evolve_ppar, - ion_dt=ion_dt) + @loop_r ir begin + @views electron_kinetic_equation_euler_update!( + scratch[istage+1].pdf_electron[:,:,:,ir], + scratch[istage+1].electron_ppar[:,ir], + scratch[istage].pdf_electron[:,:,:,ir], + scratch[istage].electron_ppar[:,ir], moments, z, vperp, vpa, + z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + collisions, composition, external_source_settings, + num_diss_params, t_params, ir; evolve_ppar=evolve_ppar, + ion_dt=ion_dt) + end speedup_hack!(scratch[istage+1], scratch[istage], z_speedup_fac, z, vpa; evolve_ppar=evolve_ppar) @@ -433,10 +464,7 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll end # update the time following the pdf update - @serial_region begin - t_params.t[] += t_params.previous_dt[] - end - _block_synchronize() + t_params.t[] += t_params.previous_dt[] residual = -1.0 if t_params.previous_dt[] > 0.0 @@ -589,6 +617,939 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll return success end +""" +Update the electron distribution function using backward-Euler for an artifical time +advance of the electron kinetic equation until a steady-state solution is reached. + +Note that this function does not use the [`runge_kutta`](@ref) timestep functionality. +`t_params.previous_dt[]` is used to store the (adaptively updated) initial timestep of the +pseudotimestepping loop (initial value of `t_params.dt[]` within +`electron_backward_euler!()`). `t_params.dt[]` is adapted according to the iteration +counts of the Newton solver. +""" +function electron_backward_euler!(scratch, pdf, moments, phi, collisions, composition, r, + z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, t_params, external_source_settings, num_diss_params, + nl_solver_params, max_electron_pdf_iterations, max_electron_sim_time; + io_electron=nothing, initial_time=nothing, residual_tolerance=nothing, + evolve_ppar=false, ion_dt=nothing) + + if max_electron_pdf_iterations === nothing && max_electron_sim_time === nothing + error("Must set one of max_electron_pdf_iterations and max_electron_sim_time") + end + + t_params.dt[] = t_params.previous_dt[] + + begin_r_z_region() + @loop_r_z ir iz begin + # update the electron thermal speed using the updated electron parallel pressure + moments.electron.vth[iz,ir] = sqrt(abs(2.0 * moments.electron.ppar[iz,ir] / + (moments.electron.dens[iz,ir] * + composition.me_over_mi))) + scratch[t_params.n_rk_stages+1].electron_ppar[iz,ir] = moments.electron.ppar[iz,ir] + end + calculate_electron_qpar_from_pdf!(moments.electron.qpar, moments.electron.ppar, + moments.electron.vth, + scratch[t_params.n_rk_stages+1].pdf_electron, vpa) + calculate_electron_moment_derivatives!(moments, + (electron_density=moments.electron.dens, + electron_upar=moments.electron.upar, + electron_ppar=moments.electron.ppar), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, + composition.electron_physics) + + reduced_by_ion_dt = false + if ion_dt !== nothing + evolve_ppar = true + + # Use forward-Euler step (with `ion_dt` as the timestep) as initial guess for + # updated electron_ppar + ppar_guess = scratch[t_params.n_rk_stages+1].electron_ppar + electron_energy_equation!(ppar_guess, moments.electron.ppar, + moments.electron.dens, moments.electron.upar, + moments.ion.dens, moments.ion.upar, moments.ion.ppar, + moments.neutral.dens, moments.neutral.uz, + moments.neutral.pz, moments.electron, collisions, + ion_dt, composition, external_source_settings.electron, + num_diss_params, r, z) + + begin_r_z_region() + @loop_r_z ir iz begin + # update the electron thermal speed using the updated electron parallel pressure + moments.electron.vth[iz,ir] = sqrt(abs(2.0 * ppar_guess[iz,ir] / + (moments.electron.dens[iz,ir] * + composition.me_over_mi))) + end + calculate_electron_qpar_from_pdf!(moments.electron.qpar, ppar_guess, + moments.electron.vth, + scratch[t_params.n_rk_stages+1].pdf_electron, + vpa) + calculate_electron_moment_derivatives!(moments, + (electron_density=moments.electron.dens, + electron_upar=moments.electron.upar, + electron_ppar=ppar_guess), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, + composition.electron_physics) + end + + if !evolve_ppar + # ppar is not updated in the pseudo-timestepping loop below. So that we can read + # ppar from the scratch structs, copy moments.electron.ppar into all of them. + moments_ppar = moments.electron.ppar + for istage ∈ 1:t_params.n_rk_stages+1 + scratch_ppar = scratch[istage].electron_ppar + @loop_r_z ir iz begin + scratch_ppar[iz,ir] = moments_ppar[iz,ir] + end + end + end + + if initial_time !== nothing + t_params.t[] = initial_time + # Make sure that output times are set relative to this initial_time (the values in + # t_params are set relative to 0.0). + moments_output_times = t_params.moments_output_times .+ initial_time + dfns_output_times = t_params.dfns_output_times .+ initial_time + else + initial_time = t_params.t[] + end + if io_electron === nothing && t_params.debug_io !== nothing + # Overwrite the debug output file with the output from this call to + # update_electron_pdf_with_time_advance!(). + io_electron = get_electron_io_info(t_params.debug_io[1], "electron_debug") + do_debug_io = true + debug_io_nwrite = t_params.debug_io[3] + else + do_debug_io = false + end + + # Store the initial number of iterations in the solution of the electron kinetic + # equation + initial_step_counter = t_params.step_counter[] + t_params.step_counter[] += 1 + + begin_serial_region() + t_params.moments_output_counter[] += 1 + @serial_region begin + if io_electron !== nothing + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], r, z, vperp, vpa) + end + end + electron_pdf_converged = false + # No paralleism in r for now - will need to add a specially adapted shared-memory + # parallelism scheme to allow it for 2D1V or 2D2V simulations. + for ir ∈ 1:r.n + # create several 0D dummy arrays for use in taking derivatives + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # initialise the electron pdf convergence flag to false + electron_pdf_converged = false + + first_step = true + # evolve (artificially) in time until the residual is less than the tolerance + while (!electron_pdf_converged + && ((max_electron_pdf_iterations !== nothing && t_params.step_counter[] - initial_step_counter < max_electron_pdf_iterations) + || (max_electron_sim_time !== nothing && t_params.t[] - initial_time < max_electron_sim_time)) + && t_params.dt[] > 0.0 && !isnan(t_params.dt[])) + + old_scratch = scratch[1] + new_scratch = scratch[t_params.n_rk_stages+1] + + # Set the initial values for the next step to the final values from the previous + # step. The initial guess for f_electron_new and electron_ppar_new are just the + # values from the old timestep, so no need to change those. + begin_z_vperp_vpa_region() + f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir] + f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir] + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_old[ivpa,ivperp,iz] = f_electron_new[ivpa,ivperp,iz] + end + electron_ppar_old = @view old_scratch.electron_ppar[:,ir] + electron_ppar_new = @view new_scratch.electron_ppar[:,ir] + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_old[iz] = electron_ppar_new[iz] + end + end + + # Calculate heat flux and derivatives using updated f_electron + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) + @views calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=moments.electron.dens[:,ir], + electron_upar=moments.electron.upar[:,ir], + electron_ppar=electron_ppar_new), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + if nl_solver_params.preconditioner_type == "electron_split_lu" + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval + nl_solver_params.solves_since_precon_update[] = 0 + + dt = t_params.dt[] + vth = @view moments.electron.vth[:,ir] + me = composition.me_over_mi + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = electron_ppar_new + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + source_amplitude = moments.electron.external_source_amplitude + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + + # Note the region(s) used here must be the same as the region(s) used + # when the matrices are used in `split_precon!()`, so that the + # parallelisation is the same and each matrix is used on the same + # process that created it. + + # z-advection preconditioner + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + z_matrix = allocate_float(z.n, z.n) + z_matrix .= 0.0 + + z_speed = @view z_advect[1].speed[:,ivpa,ivperp,ir] + for ielement ∈ 1:z.nelement_local + imin = z.imin[ielement] - (ielement != 1) + imax = z.imax[ielement] + if ielement == 1 + z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + else + if z_speed[imin] < 0.0 + z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + elseif z_speed[imin] > 0.0 + # Do nothing + else + z_matrix[imin,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + end + end + z_matrix[imin+1:imax-1,imin:imax] .+= z_spectral.lobatto.Dmat[2:end-1,:] ./ z.element_scale[ielement] + if ielement == z.nelement_local + z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + else + if z_speed[imax] < 0.0 + # Do nothing + elseif z_speed[imax] > 0.0 + z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + else + z_matrix[imax,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + end + end + end + # Multiply by advection speed + for row ∈ 1:z.n + z_matrix[row,:] .*= dt * z_speed[row] + end + + # Diagonal entries + for row ∈ 1:z.n + z_matrix[row,row] += 1.0 + + # Terms from `add_contribution_from_pdf_term!()` + z_matrix[row,row] += dt * (0.5 * dqpar_dz[row] / ppar[row] + + vpa.grid[ivpa] * vth[row] * (ddens_dz[row] / dens[row] + - dvth_dz[row] / vth[row])) + end + if external_source_settings.electron.active + for row ∈ 1:z.n + # Source terms from `add_contribution_from_pdf_term!()` + z_matrix[row,row] += dt * (1.5 * source_density_amplitude[row] / dens[row] + - (0.5 * source_pressure_amplitude[row] + + source_momentum_amplitude[row]) / ppar[row] + ) + end + if external_source_settings.electron.source_type == "energy" + for row ∈ 1:z.n + # Contribution from `external_electron_source!()` + z_matrix[row,row] += dt * source_amplitude[row] + end + end + end + if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 + for row ∈ 1:z.n + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens[row], vth[row]) + nu_ei = get_collision_frequency_ei(collisions, dens[row], vth[row]) + z_matrix[row,row] += dt * (nu_ee + nu_ei) + end + end + + nl_solver_params.preconditioners.z[ivpa,ivperp,ir] = lu(sparse(z_matrix)) + end + + if z.irank == 0 + ppar_matrix = allocate_float(z.n, z.n) + ppar_matrix .= 0.0 + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + @views third_moment = @. 0.5 * moments.electron.qpar[:,ir] / electron_ppar_new / vth + + # Note that as + # qpar = 2 * ppar * vth * third_moment + # = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * third_moment + # we have that + # d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz + # - ppar^(3/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz + # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz + # so for the Jacobian + # d[d(qpar)/dz)]/d[ppar] + # = 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz + # - 3/2 * ppar^(1/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz + # + 3/2 / ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz + # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(.)/dz + dthird_moment_dz = z.scratch2 + derivative_z!(z.scratch2, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + # Diagonal terms + for row ∈ 1:z.n + ppar_matrix[row,row] = 1.0 + + # 3*ppar*dupar_dz + ppar_matrix[row,row] += 3.0 * dt * dupar_dz[row] + + # terms from d(qpar)/dz + ppar_matrix[row,row] += + dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * dthird_moment_dz[row] + - 1.5 * sqrt(electron_ppar_new[row] / me) / dens[row]^1.5 * third_moment[row] * ddens_dz[row] + + 1.5 / sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row] * dppar_dz[row]) + end + if ion_dt !== nothing + # Backward-Euler forcing term + for row ∈ 1:z.n + ppar_matrix[row,row] += dt / ion_dt + end + end + + + # d(.)/dz terms + # Note that the z-derivative matrix is local to this block, and + # for the preconditioner we do not include any distributed-MPI + # communication (we rely on the JFNK iteration to sort out the + # coupling between blocks). + if !isa(z_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral coordinate type is " + * "supported by electron_backward_euler!() " + * "preconditioner because we need differentiation" + * "matrices.") + end + z_deriv_matrix = z_spectral.D_matrix + for row ∈ 1:z.n + @. ppar_matrix[row,:] += + dt * (upar[row] + + 3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row]) * + z_deriv_matrix[row,:] + end + + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + + nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix)) + else + ppar_matrix = allocate_float(0, 0) + ppar_matrix[] = 1.0 + end + end + + function split_precon!(x) + precon_ppar, precon_f = x + + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + z_precon_matrix = nl_solver_params.preconditioners.z[ivpa,ivperp,ir] + f_slice = @view precon_f[ivpa,ivperp,:] + @views z.scratch .= f_slice + ldiv!(z.scratch2, z_precon_matrix, z.scratch) + f_slice .= z.scratch2 + end + + begin_z_region() + ppar_precon_matrix = nl_solver_params.preconditioners.ppar[ir] + @loop_z iz begin + z.scratch[iz] = precon_ppar[iz] + end + + begin_serial_region() + @serial_region begin + ldiv!(precon_ppar, ppar_precon_matrix, z.scratch) + end + end + + left_preconditioner = identity + right_preconditioner = split_precon! + elseif nl_solver_params.preconditioner_type == "electron_lu" + + if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || + t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] + + # dt has changed significantly, so update the preconditioner + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end + + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval +println("recalculating precon") + nl_solver_params.solves_since_precon_update[] = 0 + nl_solver_params.precon_dt[] = t_params.dt[] + + orig_lu, precon_matrix, input_buffer, output_buffer = + nl_solver_params.preconditioners[ir] + + fill_electron_kinetic_equation_Jacobian!( + precon_matrix, f_electron_new, electron_ppar_new, moments, + collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, evolve_ppar) + + begin_serial_region() + if block_rank[] == 0 + if size(orig_lu) == (1, 1) + # Have not properly created the LU decomposition before, so + # cannot reuse it. + nl_solver_params.preconditioners[ir] = + (lu(sparse(precon_matrix)), precon_matrix, input_buffer, + output_buffer) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + lu!(orig_lu, sparse(precon_matrix); check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch") + orig_lu = lu(sparse(precon_matrix)) + end + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer) + end + else + nl_solver_params.preconditioners[ir] = + (orig_lu, precon_matrix, input_buffer, output_buffer) + end + end + + + function lu_precon!(x) + precon_ppar, precon_f = x + + precon_lu, _, input_buffer, output_buffer = + nl_solver_params.preconditioners[ir] + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + input_buffer[counter] = precon_f[ivpa,ivperp,iz] + counter += 1 + end + @loop_z iz begin + input_buffer[counter] = precon_ppar[iz] + counter += 1 + end + + begin_serial_region() + @serial_region begin + ldiv!(output_buffer, precon_lu, input_buffer) + end + + begin_serial_region() + counter = 1 + @loop_z_vperp_vpa iz ivperp ivpa begin + precon_f[ivpa,ivperp,iz] = output_buffer[counter] + counter += 1 + end + @loop_z iz begin + precon_ppar[iz] = output_buffer[counter] + counter += 1 + end + + # Ensure values of precon_f and precon_ppar are consistent across + # distributed-MPI block boundaries. For precon_f take the upwind + # value, and for precon_ppar take the average. + f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] + f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] + receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] + receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] + f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] + end + # We upwind the z-derivatives in `electron_z_advection!()`, so would + # expect that upwinding the results here in z would make sense. + # However, upwinding here makes convergence much slower (~10x), + # compared to picking the values from one side or other of the block + # boundary, or taking the average of the values on either side. + # Neither direction is special, so taking the average seems most + # sensible (although in an intial test it does not seem to converge + # faster than just picking one or the other). + # Maybe this could indicate that it is more important to have a fully + # self-consistent Jacobian inversion for the + # `electron_vpa_advection()` part rather than taking half(ish) of the + # values from one block and the other half(ish) from the other. + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1, + receive_buffer2, z) + + begin_serial_region() + @serial_region begin + buffer_1[] = precon_ppar[1] + buffer_2[] = precon_ppar[end] + end + reconcile_element_boundaries_MPI!( + precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z) + + return nothing + end + + left_preconditioner = identity + right_preconditioner = lu_precon! + elseif nl_solver_params.preconditioner_type == "none" + left_preconditioner = identity + right_preconditioner = identity + else + error("preconditioner_type=$(nl_solver_params.preconditioner_type) is not " + * "supported by electron_backward_euler!().") + end + + # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the + # electron parallel pressure. + function residual_func!(residual, new_variables) + electron_ppar_residual, f_electron_residual = residual + electron_ppar_newvar, f_electron_newvar = new_variables + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f_electron_newvar, phi, moments.electron.vth[:,ir], + moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi; bc_constraints=false) + + if evolve_ppar + this_dens = moments.electron.dens + this_upar = moments.electron.upar + this_vth = moments.electron.vth + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] / + (this_dens[iz,ir] * + composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_newvar, + moments.electron.vth[:,ir], + f_electron_newvar, vpa, + ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=this_dens, + electron_upar=this_upar, + electron_ppar=electron_ppar_newvar), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + else + # Calculate heat flux and derivatives using new_variables + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_newvar, + moments.electron.vth[:,ir], + f_electron_newvar, vpa, + ir) + # compute the z-derivative of the parallel electron heat flux + @views derivative_z!(moments.electron.dqpar_dz[:,ir], + moments.electron.qpar[:,ir], buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + end + + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = electron_ppar_old[iz,ir] + end + else + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = 0.0 + end + end + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] = f_electron_old[ivpa,ivperp,iz] + end + electron_kinetic_equation_euler_update!( + f_electron_residual, electron_ppar_residual, f_electron_newvar, + electron_ppar_newvar, moments, z, vperp, vpa, z_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, + composition, external_source_settings, num_diss_params, t_params, + ir; evolve_ppar=evolve_ppar, ion_dt=ion_dt, + soft_force_constraints=true) + + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_residual[ivpa,ivperp,iz] = f_electron_newvar[ivpa,ivperp,iz] - f_electron_residual[ivpa,ivperp,iz] + end + if evolve_ppar + begin_z_region() + @loop_z iz begin + electron_ppar_residual[iz] = electron_ppar_newvar[iz] - electron_ppar_residual[iz] + end + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], + moments.electron.upar[iz,ir], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + f_electron_residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], + moments.electron.upar[iz,ir], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + f_electron_residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + + return nothing + end + + residual = (scratch_dummy.implicit_buffer_z_1, scratch_dummy.implicit_buffer_vpavperpz_1) + delta_x = (scratch_dummy.implicit_buffer_z_2, + scratch_dummy.implicit_buffer_vpavperpz_2) + rhs_delta = (scratch_dummy.implicit_buffer_z_3, + scratch_dummy.implicit_buffer_vpavperpz_3) + v = (scratch_dummy.implicit_buffer_z_4, + scratch_dummy.implicit_buffer_vpavperpz_4) + w = (scratch_dummy.implicit_buffer_z_5, + scratch_dummy.implicit_buffer_vpavperpz_5) + + newton_success = newton_solve!((electron_ppar_new, f_electron_new), + residual_func!, residual, delta_x, rhs_delta, + v, w, nl_solver_params; + left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner, + coords=(z=z, vperp=vperp, vpa=vpa)) + if newton_success + #println("Newton its ", nl_solver_params.max_nonlinear_iterations_this_step[], " ", t_params.dt[]) + # update the time following the pdf update + t_params.t[] += t_params.dt[] + + if first_step && !reduced_by_ion_dt + # Adjust t_params.previous_dt[] which gives the initial timestep for + # the electron pseudotimestepping loop. + # If ion_dt ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] > max(0.4 * nl_solver_params.nonlinear_max_iterations, 5) + elseif nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold + # Step succeeded, but took a lot of iterations so decrease initial + # step size. + print("decreasing previous_dt due to iteration count ", t_params.previous_dt[]) + t_params.previous_dt[] /= t_params.max_increase_factor + println(" -> ", t_params.previous_dt[]) + #elseif nl_solver_params.max_linear_iterations_this_step[] < max(0.1 * nl_solver_params.nonlinear_max_iterations, 2) + elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.previous_dt[] < t_params.cap_factor_ion_dt * ion_dt) + # Only took a few iterations, so increase initial step size. + print("increasing previous_dt due to iteration count ", t_params.previous_dt[]) + if ion_dt === nothing + t_params.previous_dt[] *= t_params.max_increase_factor + else + t_params.previous_dt[] = min(t_params.previous_dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) + end + println(" -> ", t_params.previous_dt[]) + end + end + + # Adjust the timestep depending on the iteration count. + # Note nl_solver_params.max_linear_iterations_this_step[] gives the total + # number of iterations, so is a better measure of the total work done by + # the solver than the nonlinear iteration count, or the linear iterations + # per nonlinear iteration + #if nl_solver_params.max_linear_iterations_this_step[] > max(0.2 * nl_solver_params.nonlinear_max_iterations, 10) + if nl_solver_params.max_linear_iterations_this_step[] > t_params.decrease_dt_iteration_threshold && t_params.dt[] > t_params.previous_dt[] + # Step succeeded, but took a lot of iterations so decrease step size. + t_params.dt[] /= t_params.max_increase_factor + elseif nl_solver_params.max_linear_iterations_this_step[] < t_params.increase_dt_iteration_threshold && (ion_dt === nothing || t_params.dt[] < t_params.cap_factor_ion_dt * ion_dt) + # Only took a few iterations, so increase step size. + if ion_dt === nothing + t_params.dt[] *= t_params.max_increase_factor + else + t_params.dt[] = min(t_params.dt[] * t_params.max_increase_factor, t_params.cap_factor_ion_dt * ion_dt) + end + end + + first_step = false + else + t_params.dt[] *= 0.5 + + # Force the preconditioner to be recalculated, because we have just + # changed `dt` by a fairly large amount. + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + + # Swap old_scratch and new_scratch so that the next step restarts from the + # same state + scratch[1] = new_scratch + scratch[t_params.n_rk_stages+1] = old_scratch + old_scratch = scratch[1] + new_scratch = scratch[t_params.n_rk_stages+1] + f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir] + f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir] + electron_ppar_old = @view old_scratch.electron_ppar[:,ir] + electron_ppar_new = @view new_scratch.electron_ppar[:,ir] + end + + apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z, + vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, + num_diss_params, composition, ir) + + if !evolve_ppar + # update the electron heat flux + moments.electron.qpar_updated[] = false + @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], + electron_ppar_new, + moments.electron.vth[:,ir], + f_electron_new, vpa, ir) + + # compute the z-derivative of the parallel electron heat flux + @views derivative_z!(moments.electron.dqpar_dz[:,ir], + moments.electron.qpar[:,ir], buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + end + + residual = -1.0 + if newton_success + # Calculate residuals to decide if iteration is converged. + # Might want an option to calculate the residual only after a certain number + # of iterations (especially during initialization when there are likely to be + # a large number of iterations required) to avoid the expense, and especially + # the global MPI.Bcast()? + begin_z_vperp_vpa_region() + residual = steady_state_residuals(new_scratch.pdf_electron, + old_scratch.pdf_electron, + t_params.dt[]; use_mpi=true, + only_max_abs=true) + if global_rank[] == 0 + residual = first(values(residual))[1] + end + if evolve_ppar + ppar_residual = + steady_state_residuals(new_scratch.electron_ppar, + old_scratch.electron_ppar, + t_params.dt[]; use_mpi=true, + only_max_abs=true) + if global_rank[] == 0 + ppar_residual = first(values(ppar_residual))[1] + residual = max(residual, ppar_residual) + end + end + if global_rank[] == 0 + if residual_tolerance === nothing + residual_tolerance = t_params.converged_residual_value + end + electron_pdf_converged = abs(residual) < residual_tolerance + end + electron_pdf_converged = MPI.Bcast(electron_pdf_converged, 0, comm_world) + end + + if (mod(t_params.step_counter[] - initial_step_counter,100) == 0) + begin_serial_region() + @serial_region begin + if z.irank == 0 && z.irank == z.nrank - 1 + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual: ", residual) + elseif z.irank == 0 + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual: ", residual) + end + end + end + if ((t_params.step_counter[] % t_params.nwrite_moments == 0) + || (do_debug_io && (t_params.step_counter[] % debug_io_nwrite == 0))) + + if r.n == 1 + # For now can only do I/O within the pseudo-timestepping loop when there + # is no r-dimension, because different points in r would take different + # number and length of timesteps to converge. + begin_serial_region() + t_params.moments_output_counter[] += 1 + @serial_region begin + if io_electron !== nothing + t_params.write_moments_output[] = false + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], r, z, vperp, + vpa) + end + end + end + end + + reset_nonlinear_per_stage_counters!(nl_solver_params) + + t_params.step_counter[] += 1 + if electron_pdf_converged + break + end + end + if !electron_pdf_converged + # If electron solve failed to converge for some `ir`, the failure will be + # handled by restarting the ion timestep with a smaller dt, so no need to try + # to solve for further `ir` values. + break + end + end + # Update the 'pdf' arrays with the final result + begin_r_z_vperp_vpa_region() + final_scratch_pdf = scratch[t_params.n_rk_stages+1].pdf_electron + @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + pdf[ivpa,ivperp,iz,ir] = final_scratch_pdf[ivpa,ivperp,iz,ir] + end + if evolve_ppar + # Update `moments.electron.ppar` with the final electron pressure + begin_r_z_region() + scratch_ppar = scratch[t_params.n_rk_stages+1].electron_ppar + moments_ppar = moments.electron.ppar + @loop_r_z ir iz begin + moments_ppar[iz,ir] = scratch_ppar[iz,ir] + end + end + begin_serial_region() + @serial_region begin + if !electron_pdf_converged || do_debug_io + if io_electron !== nothing && io_electron !== true + t_params.moments_output_counter[] += 1 + write_electron_state(scratch, moments, t_params, io_electron, + t_params.moments_output_counter[], r, z, vperp, vpa) + finish_electron_io(io_electron) + end + end + end + + if r.n > 1 + error("Limits on iteration count and simtime assume 1D simulations. " + * "Need to fix handling of t_params.t[] and t_params.step_counter[], " + * "and also t_params.max_step_count_this_ion_step[] and " + * "t_params.max_t_increment_this_ion_step[]") + else + t_params.max_step_count_this_ion_step[] = + max(t_params.step_counter[] - initial_step_counter, + t_params.max_step_count_this_ion_step[]) + t_params.max_t_increment_this_ion_step[] = + max(t_params.t[] - initial_time, + t_params.max_t_increment_this_ion_step[]) + end + + initial_dt_scale_factor = 0.1 + if t_params.previous_dt[] < initial_dt_scale_factor * t_params.dt[] + # If dt has increased a lot, we can probably try a larger initial dt for the next + # solve. + t_params.previous_dt[] = initial_dt_scale_factor * t_params.dt[] + end + + if ion_dt !== nothing && t_params.dt[] != t_params.previous_dt[] + # Reset dt in case it was reduced to be less than 0.5*ion_dt + t_params.dt[] = t_params.previous_dt[] + end + if !electron_pdf_converged + success = "kinetic-electrons" + else + success = "" + end + return success +end + """ implicit_electron_advance!() @@ -596,26 +1557,35 @@ Do an implicit solve which finds: the steady-state electron shape function \$g_e backward-Euler advanced electron pressure which is updated using \$g_e\$ at the new time-level. -Implicit electron solve includes r-dimension. For 1D runs this makes no difference. In 2D -it might or might not be necessary. If the r-dimension is not needed in the implicit -solve, we would need to work on the parallelisation. The simplest option would be a -non-parallelised outer loop over r, with each nonlinear solve being parallelised over -{z,vperp,vpa}. More efficient might be to add an equivalent to the 'anyv' parallelisation -used for the collision operator (e.g. 'anyzv'?) to allow the outer r-loop to be -parallelised. +The r-dimension is not parallelised. For 1D runs this makes no difference. In 2D it might +or might not be necessary. If r-dimension parallelisation is needed, it would need some +work. The simplest option would be a non-parallelised outer loop over r, with each +nonlinear solve being parallelised over {z,vperp,vpa}. More efficient might be to add an +equivalent to the 'anyv' parallelisation used for the collision operator (e.g. 'anyzv'?) +to allow the outer r-loop to be parallelised. """ function implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, moments, fields, collisions, composition, geometry, external_source_settings, num_diss_params, r, z, vperp, vpa, r_spectral, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, gyroavs, - scratch_dummy, dt, nl_solver_params) + scratch_dummy, t_params, ion_dt, nl_solver_params) electron_ppar_out = fvec_out.electron_ppar # Store the solved-for pdf in n_rk_stages+1, because this was the version that gets # written to output for the explicit-electron-timestepping version. pdf_electron_out = scratch_electron.pdf_electron + # If we just defined the residual for the electron distribution function solve to be + # 'dg/dt=0', then we would be asking the solver (roughly) to find g such that + # 'dg/dt 1 - begin_r_z_vperp_region() - @loop_r_z_vperp ir iz ivperp begin - @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz,ir], vpa.bc, - vpa_advect[1].speed[:,ivperp,iz,ir], - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - vpa, vpa_spectral) + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(f_electron_residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end end - end - if vperp.n > 1 - begin_r_z_vpa_region() - enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, - vperp_adv, vperp_diffusion) - end - if z.bc == "wall" && (z.irank == 0 || z.irank == z.nrank - 1) - # Wall boundary conditions. Note that as density, upar, ppar do not - # change in this implicit step, f_new, f_old, and residual should all - # be zero at exactly the same set of grid points, so it is reasonable - # to zero-out `residual` to impose the boundary condition. We impose - # this after subtracting f_old in case rounding errors, etc. mean that - # at some point f_old had a different boundary condition cut-off - # index. - begin_r_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - iz = 1 - @loop_r ir begin + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, + vperp_adv, vperp_diffusion) + end + if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note that + # as density, upar, ppar do not change in this implicit step, f_new, + # f_old, and residual should all be zero at exactly the same set of grid + # points, so it is reasonable to zero-out `residual` to impose the + # boundary condition. We impose this after subtracting f_old in case + # rounding errors, etc. mean that at some point f_old had a different + # boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], fvec_in.electron_upar[iz,ir], true, true) @loop_vperp_vpa ivperp ivpa begin - if v_unnorm > -zero - f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + if v_unnorm[ivpa] > -zero + f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 end end end - end - if z.irank == z.nrank - 1 - iz = z.n - @loop_r ir begin + if z.irank == z.nrank - 1 + iz = z.n v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], fvec_in.electron_upar[iz,ir], true, true) - @loop_vperp_vpa ivpa ivperp begin - if v_unnorm < zero - f_electron_residual[ivpa,ivperp,iz,ir] .= 0.0 + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 end end end end + begin_z_region() + @loop_z iz begin + @views moment_constraints_on_residual!(f_electron_residual[:,:,iz], + f_electron_new[:,:,iz], + (evolve_density=true, + evolve_upar=true, + evolve_ppar=true), + vpa) + end + return nothing end - begin_r_z_region() - @loop_r_z ir iz begin - @views moment_constraints_on_residual!(f_electron_residual[:,:,iz,ir], f_electron_new[:,:,iz,ir], - (evolve_density=true, evolve_upar=true, evolve_ppar=true), - vpa) + + residual = (scratch_dummy.implicit_buffer_z_1, + scratch_dummy.implicit_buffer_vpavperpz_1) + delta_x = (scratch_dummy.implicit_buffer_z_2, + scratch_dummy.implicit_buffer_vpavperpz_2) + rhs_delta = (scratch_dummy.implicit_buffer_z_3, + scratch_dummy.implicit_buffer_vpavperpz_3) + v = (scratch_dummy.implicit_buffer_z_4, + scratch_dummy.implicit_buffer_vpavperpz_4) + w = (scratch_dummy.implicit_buffer_z_5, + scratch_dummy.implicit_buffer_vpavperpz_5) + + @views newton_success = newton_solve!((electron_ppar_out[:,ir], + pdf_electron_out[:,:,:,ir]), + residual_func!, residual, delta_x, + rhs_delta, v, w, nl_solver_params; + left_preconditioner=nothing, + right_preconditioner=nothing, + coords=(z=z, vperp=vperp, vpa=vpa)) + if !newton_success + break end - return nothing end - residual = (scratch_dummy.implicit_buffer_zr_1, - scratch_dummy.implicit_buffer_vpavperpzr_1) - delta_x = (scratch_dummy.implicit_buffer_zr_2, - scratch_dummy.implicit_buffer_vpavperpzr_2) - rhs_delta = (scratch_dummy.implicit_buffer_zr_3, - scratch_dummy.implicit_buffer_vpavperpzr_3) - v = (scratch_dummy.implicit_buffer_zr_4, - scratch_dummy.implicit_buffer_vpavperpzr_4) - w = (scratch_dummy.implicit_buffer_zr_5, - scratch_dummy.implicit_buffer_vpavperpzr_5) - - newton_success = newton_solve!((electron_ppar_out, pdf_electron_out), residual_func!, - residual, delta_x, rhs_delta, v, w, nl_solver_params; - left_preconditioner=nothing, - right_preconditioner=nothing, - coords=(r=r, z=z, vperp=vperp, vpa=vpa)) - # Fill pdf.electron.norm non_scratch_pdf = pdf.electron.norm begin_r_z_vperp_vpa_region() @@ -863,10 +1851,44 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, z, vperp end end +function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vperp, + vpa, vperp_spectral, vpa_spectral, + vpa_advect, num_diss_params, composition, + ir) + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron[ivpa,ivperp,iz] = max(f_electron[ivpa,ivperp,iz], 0.0) + end + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir], + z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi) + + begin_z_region() + A = moments.electron.constraints_A_coefficient + B = moments.electron.constraints_B_coefficient + C = moments.electron.constraints_C_coefficient + skip_first = z.irank == 0 && z.bc != "periodic" + skip_last = z.irank == z.nrank - 1 && z.bc != "periodic" + @loop_z iz begin + if (iz == 1 && skip_first) || (iz == z.n && skip_last) + continue + end + (A[iz,ir], B[iz,ir], C[iz,ir]) = + @views hard_force_moment_constraints!(f_electron[:,:,iz], + (evolve_density=true, + evolve_upar=true, + evolve_ppar=true), vpa) + end +end + function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, - me_over_mi) + me_over_mi; bc_constraints=true) newton_tol = 1.0e-13 @@ -889,6 +1911,28 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp if z.bc == "periodic" # Nothing more to do for z-periodic boundary conditions return nothing + elseif z.bc == "constant" + begin_r_vperp_vpa_region() + density_offset = 1.0 + vwidth = 1.0/sqrt(composition.me_over_mi) + dens = moments.electron.dens + if z.irank == 0 + speed = z_adv[1].speed + @loop_r_vperp_vpa ir ivperp ivpa begin + if speed[1,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,1,ir,is] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed[1,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end + end + if z.irank == z.nrank - 1 + speed = z_adv[is].speed + @loop_r_vperp_vpa ir ivperp ivpa begin + if speed[end,ivpa,ivperp,ir] > 0.0 + pdf[ivpa,ivperp,end,ir,is] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed[end,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + end + end + end + return nothing end # first enforce the boundary condition at z_min. @@ -907,22 +1951,29 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime, c1, c1prime, c2, c2prime, d1, d1prime, e1, e1prime, e2, e2prime, u_over_vt) - alpha = a1 + 2.0 * a2 - alphaprime = a1prime + 2.0 * a2prime - beta = c1 + 2.0 * c2 - betaprime = c1prime + 2.0 * c2prime - gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta - gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime - delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 - deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime - - A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) - Aprime = (0.5 * betaprime - deltaprime - - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) - / (beta * gamma - alpha * delta) - ) / (beta * gamma - alpha * delta) - C = (1.0 - alpha * A) / beta - Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 + if bc_constraints + alpha = a1 + 2.0 * a2 + alphaprime = a1prime + 2.0 * a2prime + beta = c1 + 2.0 * c2 + betaprime = c1prime + 2.0 * c2prime + gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta + gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime + delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 + deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime + + A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) + Aprime = (0.5 * betaprime - deltaprime + - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) + / (beta * gamma - alpha * delta) + ) / (beta * gamma - alpha * delta) + C = (1.0 - alpha * A) / beta + Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 + else + A = 1.0 + Aprime = 0.0 + C = 0.0 + Cprime = 0.0 + end epsilon = A * b1 + C * d1 - u_over_vt epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime @@ -932,7 +1983,7 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp if z.irank == 0 if z.bc != "wall" - error("Options other than wall or z-periodic bc not implemented yet for electrons") + error("Options other than wall, constant or z-periodic bc not implemented yet for electrons") end @loop_r ir begin # Impose sheath-edge boundary condition, while also imposing moment @@ -1054,6 +2105,13 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp # respect to vcut delta_v = - epsilon / epsilonprime + if vcut > vthe[1,ir] && epsilonprime < 0.0 + # epsilon should be increasing with vcut at epsilon=0, so if + # epsilonprime is negative, the solution is actually at a lower vcut - + # at larger vcut, epsilon will just tend to 0 but never reach it. + delta_v = -0.1 * vthe[1,ir] + end + # Prevent the step size from getting too big, to make Newton iteration # more robust. delta_v = min(delta_v, 0.1 * vthe[1,ir]) @@ -1314,6 +2372,13 @@ function enforce_boundary_condition_on_electron_pdf!(pdf, phi, vthe, upar, z, vp # respect to vcut delta_v = - epsilon / epsilonprime + if vcut > vthe[1,ir] && epsilonprime > 0.0 + # epsilon should be decreasing with vcut at epsilon=0, so if + # epsilonprime is positive, the solution is actually at a lower vcut - + # at larger vcut, epsilon will just tend to 0 but never reach it. + delta_v = -0.1 * vthe[1,ir] + end + # Prevent the step size from getting too big, to make Newton iteration # more robust. delta_v = min(delta_v, 0.1 * vthe[end,ir]) @@ -1477,11 +2542,6 @@ function electron_adaptive_timestep_update!(scratch, t, t_params, moments, phi, error_norms = error_norm_type[] total_points = mk_int[] - # Read the current dt here, so we only need one _block_synchronize() call for this and - # the begin_s_r_z_vperp_vpa_region() - current_dt = t_params.dt[] - _block_synchronize() - # Test CFL conditions for advection in electron kinetic equation to give stability # limit for timestep # @@ -1561,9 +2621,8 @@ function electron_adaptive_timestep_update!(scratch, t, t_params, moments, phi, end adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, - current_dt, error_norm_method, "", 0.0, - composition; electron=true, - local_max_dt=local_max_dt) + error_norm_method, "", 0.0, composition; + electron=true, local_max_dt=local_max_dt) if t_params.previous_dt[] == 0.0 # Timestep failed, so reset scratch[t_params.n_rk_stages+1] equal to # scratch[1] to start the timestep over. @@ -1774,81 +2833,95 @@ function update_electron_pdf_with_picard_iteration!(pdf, dens, vthe, ppar, ddens end """ - electron_kinetic_equation_euler_update!(fvec, pdf, moments, z, vperp, vpa, - z_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, collisions, - num_diss_params, dt; evolve_ppar=false) + electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, moments, + z, vperp, vpa, z_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + collisions, composition, + external_source_settings, + num_diss_params, t_params, ir; + evolve_ppar=false, ion_dt=nothing) Do a forward-Euler update of the electron kinetic equation. When `evolve_ppar=true` is passed, also updates the electron parallel pressure. + +Note that this function operates on a single point in `r`, given by `ir`, and `f_out`, +`ppar_out`, `f_in`, and `ppar_in` should have no r-dimension. """ -function electron_kinetic_equation_euler_update!(fvec_out, fvec_in, moments, z, vperp, - vpa, z_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, collisions, - composition, external_source_settings, - num_diss_params, dt; evolve_ppar=false, - ion_dt=nothing) +function electron_kinetic_equation_euler_update!(f_out, ppar_out, f_in, ppar_in, moments, + z, vperp, vpa, z_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + collisions, composition, + external_source_settings, + num_diss_params, t_params, ir; + evolve_ppar=false, ion_dt=nothing, + soft_force_constraints=false) + dt = t_params.dt[] + # add the contribution from the z advection term - electron_z_advection!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.upar, moments.electron.vth, z_advect, z, - vpa.grid, z_spectral, scratch_dummy, dt) + @views electron_z_advection!(f_out, f_in, moments.electron.upar[:,ir], + moments.electron.vth[:,ir], z_advect, z, vpa.grid, + z_spectral, scratch_dummy, dt, ir) # add the contribution from the wpa advection term - electron_vpa_advection!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.dens, moments.electron.upar, - fvec_in.electron_ppar, moments, vpa_advect, vpa, vpa_spectral, - scratch_dummy, dt, external_source_settings.electron) + @views electron_vpa_advection!(f_out, f_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], ppar_in, moments, + vpa_advect, vpa, vpa_spectral, scratch_dummy, dt, + external_source_settings.electron, ir) # add in the contribution to the residual from the term proportional to the pdf - add_contribution_from_pdf_term!(fvec_out.pdf_electron, fvec_in.pdf_electron, - fvec_in.electron_ppar, moments.electron.dens, - moments.electron.upar, moments, vpa.grid, z, dt, - external_source_settings.electron) + add_contribution_from_pdf_term!(f_out, f_in, ppar_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], moments, vpa.grid, z, dt, + external_source_settings.electron, ir) # add in numerical dissipation terms - add_dissipation_term!(fvec_out.pdf_electron, fvec_in.pdf_electron, scratch_dummy, - z_spectral, z, vpa, vpa_spectral, num_diss_params, dt) + add_dissipation_term!(f_out, f_in, scratch_dummy, z_spectral, z, vpa, vpa_spectral, + num_diss_params, dt) if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 # Add a Krook collision operator # Set dt=-1 as we update the residual here rather than adding an update to # 'fvec_out'. - electron_krook_collisions!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.dens, moments.electron.upar, - moments.ion.upar, moments.electron.vth, collisions, - vperp, vpa, dt) + @views electron_krook_collisions!(f_out, f_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], + moments.ion.upar[:,ir], + moments.electron.vth[:,ir], collisions, vperp, + vpa, dt) end - total_external_electron_sources!(fvec_out.pdf_electron, fvec_in.pdf_electron, - moments.electron.dens, moments.electron.upar, moments, - composition, external_source_settings.electron, vperp, - vpa, dt) + @views total_external_electron_sources!(f_out, f_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], moments, + composition, external_source_settings.electron, + vperp, vpa, dt, ir) + if soft_force_constraints + electron_implicit_constraint_forcing!(f_out, f_in, + t_params.constraint_forcing_rate, vpa, dt, + ir) + end if evolve_ppar - electron_energy_equation!(fvec_out.electron_ppar, fvec_in.electron_ppar, - moments.electron.dens, moments.electron.upar, - moments.ion.dens, moments.ion.upar, moments.ion.ppar, - moments.neutral.dens, moments.neutral.uz, - moments.neutral.pz, moments.electron, collisions, dt, - composition, external_source_settings.electron, - num_diss_params, z) + @views electron_energy_equation_no_r!( + ppar_out, ppar_in, moments.electron.dens[:,ir], + moments.electron.upar[:,ir], moments.ion.dens[:,ir,:], + moments.ion.upar[:,ir,:], moments.ion.ppar[:,ir,:], + moments.neutral.dens[:,ir,:], moments.neutral.uz[:,ir,:], + moments.neutral.pz[:,ir,:], moments.electron, collisions, dt, + composition, external_source_settings.electron, num_diss_params, z, ir) if ion_dt !== nothing # Add source term to turn steady state solution into a backward-Euler update of # electron_ppar with the ion timestep `ion_dt`. - ppar_out = fvec_out.electron_ppar ppar_previous_ion_step = moments.electron.ppar - begin_r_z_region() - @loop_r_z ir iz begin + begin_z_region() + @loop_z iz begin # At this point, ppar_out = ppar_in + dt*RHS(ppar_in). Here we add a # source/damping term so that in the steady state of the electron # pseudo-timestepping iteration, # RHS(ppar) - (ppar - ppar_previous_ion_step) / ion_dt = 0, # resulting in a backward-Euler step (as long as the pseudo-timestepping # loop converges). - ppar_out[iz,ir] += -dt * (ppar_out[iz,ir] - ppar_previous_ion_step[iz,ir]) / ion_dt + ppar_out[iz] += -dt * (ppar_in[iz] - ppar_previous_ion_step[iz,ir]) / ion_dt end end end @@ -1857,89 +2930,200 @@ function electron_kinetic_equation_euler_update!(fvec_out, fvec_in, moments, z, end """ -electron_kinetic_equation_residual! calculates the residual of the (time-independent) electron kinetic equation -INPUTS: - residual = dummy array to be filled with the residual of the electron kinetic equation -OUTPUT: - residual = updated residual of the electron kinetic equation + fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, moments, + collisions, composition, z, vperp, vpa, + z_spectral, vperp_specral, + vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, + num_diss_params, t_params, ion_dt, + ir, evolve_ppar) + +Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if +`evolve_ppar=true`) the electron energy equation. """ -function electron_kinetic_equation_residual!(residual, max_term, single_term, pdf, dens, upar, vth, ppar, upar_ion, - ddens_dz, dppar_dz, dqpar_dz, dvth_dz, - z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - collisions, external_source_settings, - num_diss_params, dt_electron) +function fill_electron_kinetic_equation_Jacobian!(jacobian_matrix, f, ppar, moments, + collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, + num_diss_params, t_params, ion_dt, ir, + evolve_ppar) + dt = t_params.dt[] + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + vth = @view moments.electron.vth[:,ir] + me = composition.me_over_mi + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + + upar_ion = @view moments.ion.upar[:,ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) - # initialise the residual to zero - begin_r_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - residual[ivpa,ivperp,iz,ir] = 0.0 - end - # calculate the contribution to the residual from the z advection term - electron_z_advection!(residual, pdf, upar, vth, z_advect, z, vpa.grid, z_spectral, scratch_dummy, -1.0) - #dt_max_zadv = simple_z_advection!(residual, pdf, vth, z, vpa.grid, dt_electron) - #single_term .= residual - #max_term .= abs.(residual) - #println("z_adv residual = ", maximum(abs.(single_term))) - #println("z_advection: ", sum(residual), " dqpar_dz: ", sum(abs.(dqpar_dz))) - #calculate_contribution_from_z_advection!(residual, pdf, vth, z, vpa.grid, z_spectral, scratch_dummy) - # add in the contribution to the residual from the wpa advection term - electron_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, - vpa_advect, vpa, vpa_spectral, scratch_dummy, -1.0, - external_source_settings.electron) - #dt_max_vadv = simple_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, vpa, dt_electron) - #@. single_term = residual - single_term - #max_term .= max.(max_term, abs.(single_term)) - #@. single_term = residual - #println("v_adv residual = ", maximum(abs.(single_term))) - #add_contribution_from_wpa_advection!(residual, pdf, vth, ppar, dppar_dz, dqpar_dz, dvth_dz, vpa, vpa_spectral) - # add in the contribution to the residual from the term proportional to the pdf - add_contribution_from_pdf_term!(residual, pdf, ppar, dens, moments, vpa.grid, z, -1.0, - external_source_settings.electron) - #@. single_term = residual - single_term - #max_term .= max.(max_term, abs.(single_term)) - #@. single_term = residual - #println("pdf_term residual = ", maximum(abs.(single_term))) - # @loop_vpa ivpa begin - # @loop_z iz begin - # println("LHS: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 1) - # end - # println("") - # end - # println("") - # add in numerical dissipation terms - add_dissipation_term!(residual, pdf, scratch_dummy, z_spectral, z, vpa, vpa_spectral, - num_diss_params, -1.0) - #@. single_term = residual - single_term - #println("dissipation residual = ", maximum(abs.(single_term))) - #max_term .= max.(max_term, abs.(single_term)) - # add in particle and heat source term(s) - #@. single_term = residual - #add_source_term!(residual, vpa.grid, z.grid, dvth_dz) - #@. single_term = residual - single_term - #max_term .= max.(max_term, abs.(single_term)) - #stop() - # @loop_vpa ivpa begin - # @loop_z iz begin - # println("total_residual: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 2) - # end - # println("") - # end - # stop() - #dt_max = min(dt_max_zadv, dt_max_vadv) + pdf_size = z.n * vperp.n * vpa.n + v_size = vperp.n * vpa.n - if collisions.krook_collision_frequency_prefactor_ee > 0.0 - # Add a Krook collision operator - # Set dt=-1 as we update the residual here rather than adding an update to - # 'fvec_out'. - electron_krook_collisions!(residual, pdf, dens, upar, upar_ion, vth, - collisions, vperp, vpa, -1.0) + # Initialise jacobian_matrix to the identity + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + v_remainder = (ivperp - 1) * vpa.n + ivpa + + jacobian_matrix[row,:] .= 0.0 + jacobian_matrix[row,row] += 1.0 end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz - dt_max = dt_electron - #println("dt_max: ", dt_max, " dt_max_zadv: ", dt_max_zadv, " dt_max_vadv: ", dt_max_vadv) - return dt_max + jacobian_matrix[row,:] .= 0.0 + jacobian_matrix[row,row] += 1.0 + end + + z_speed = @view z_advect[1].speed[:,:,:,ir] + + add_electron_z_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, + z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size) + add_electron_vpa_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir; + ppar_offset=pdf_size) + add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size) + add_electron_dissipation_term_to_Jacobian!( + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir) + add_electron_krook_collisions_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, + z_speed, dt, ir; ppar_offset=pdf_size) + add_total_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z, + vperp, vpa, dt, ir; ppar_offset=pdf_size) + add_electron_implicit_constraint_forcing_to_Jacobian!( + jacobian_matrix, f, z_speed, z, vperp, vpa, t_params.constraint_forcing_rate, dt, + ir) + # Always add the electron energy equation term, even if evolve_ppar=false, so that the + # Jacobian matrix always has the same shape, meaning that we can always reuse the LU + # factorization struct. + add_electron_energy_equation_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir; ppar_offset=pdf_size) + if ion_dt !== nothing + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( + jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) + end + + return nothing end +#""" +#electron_kinetic_equation_residual! calculates the residual of the (time-independent) electron kinetic equation +#INPUTS: +# residual = dummy array to be filled with the residual of the electron kinetic equation +#OUTPUT: +# residual = updated residual of the electron kinetic equation +#""" +#function electron_kinetic_equation_residual!(residual, max_term, single_term, pdf, dens, upar, vth, ppar, upar_ion, +# ddens_dz, dppar_dz, dqpar_dz, dvth_dz, +# z, vperp, vpa, z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, +# collisions, external_source_settings, +# num_diss_params, dt_electron) +# +# # initialise the residual to zero +# begin_r_vperp_vpa_region() +# @loop_r_z_vperp_vpa ir iz ivperp ivpa begin +# residual[ivpa,ivperp,iz,ir] = 0.0 +# end +# # calculate the contribution to the residual from the z advection term +# electron_z_advection!(residual, pdf, upar, vth, z_advect, z, vpa.grid, z_spectral, scratch_dummy, -1.0) +# #dt_max_zadv = simple_z_advection!(residual, pdf, vth, z, vpa.grid, dt_electron) +# #single_term .= residual +# #max_term .= abs.(residual) +# #println("z_adv residual = ", maximum(abs.(single_term))) +# #println("z_advection: ", sum(residual), " dqpar_dz: ", sum(abs.(dqpar_dz))) +# #calculate_contribution_from_z_advection!(residual, pdf, vth, z, vpa.grid, z_spectral, scratch_dummy) +# # add in the contribution to the residual from the wpa advection term +# electron_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, +# vpa_advect, vpa, vpa_spectral, scratch_dummy, -1.0, +# external_source_settings.electron) +# #dt_max_vadv = simple_vpa_advection!(residual, pdf, ppar, vth, dppar_dz, dqpar_dz, dvth_dz, vpa, dt_electron) +# #@. single_term = residual - single_term +# #max_term .= max.(max_term, abs.(single_term)) +# #@. single_term = residual +# #println("v_adv residual = ", maximum(abs.(single_term))) +# #add_contribution_from_wpa_advection!(residual, pdf, vth, ppar, dppar_dz, dqpar_dz, dvth_dz, vpa, vpa_spectral) +# # add in the contribution to the residual from the term proportional to the pdf +# add_contribution_from_pdf_term!(residual, pdf, ppar, dens, moments, vpa.grid, z, -1.0, +# external_source_settings.electron) +# #@. single_term = residual - single_term +# #max_term .= max.(max_term, abs.(single_term)) +# #@. single_term = residual +# #println("pdf_term residual = ", maximum(abs.(single_term))) +# # @loop_vpa ivpa begin +# # @loop_z iz begin +# # println("LHS: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 1) +# # end +# # println("") +# # end +# # println("") +# # add in numerical dissipation terms +# add_dissipation_term!(residual, pdf, scratch_dummy, z_spectral, z, vpa, vpa_spectral, +# num_diss_params, -1.0) +# #@. single_term = residual - single_term +# #println("dissipation residual = ", maximum(abs.(single_term))) +# #max_term .= max.(max_term, abs.(single_term)) +# # add in particle and heat source term(s) +# #@. single_term = residual +# #add_source_term!(residual, vpa.grid, z.grid, dvth_dz) +# #@. single_term = residual - single_term +# #max_term .= max.(max_term, abs.(single_term)) +# #stop() +# # @loop_vpa ivpa begin +# # @loop_z iz begin +# # println("total_residual: ", residual[ivpa,1,iz,1], " vpa: ", vpa.grid[ivpa], " z: ", z.grid[iz], " dvth_dz: ", dvth_dz[iz,1], " type: ", 2) +# # end +# # println("") +# # end +# # stop() +# #dt_max = min(dt_max_zadv, dt_max_vadv) +# +# if collisions.krook_collision_frequency_prefactor_ee > 0.0 +# # Add a Krook collision operator +# # Set dt=-1 as we update the residual here rather than adding an update to +# # 'fvec_out'. +# electron_krook_collisions!(residual, pdf, dens, upar, upar_ion, vth, +# collisions, vperp, vpa, -1.0) +# end +# +# dt_max = dt_electron +# #println("dt_max: ", dt_max, " dt_max_zadv: ", dt_max_zadv, " dt_max_vadv: ", dt_max_vadv) +# return dt_max +#end + function simple_z_advection!(advection_term, pdf, vth, z, vpa, dt_max_in) dt_max = dt_max_in # take the z derivative of the input pdf @@ -2026,29 +3210,49 @@ end function add_dissipation_term!(pdf_out, pdf_in, scratch_dummy, z_spectral, z, vpa, vpa_spectral, num_diss_params, dt) - dummy_zr1 = @view scratch_dummy.dummy_zrs[:,:,1] - dummy_zr2 = @view scratch_dummy.buffer_vpavperpzr_1[1,1,:,:] - buffer_r_1 = @view scratch_dummy.buffer_rs_1[:,1] - buffer_r_2 = @view scratch_dummy.buffer_rs_2[:,1] - buffer_r_3 = @view scratch_dummy.buffer_rs_3[:,1] - buffer_r_4 = @view scratch_dummy.buffer_rs_4[:,1] - # add in numerical dissipation terms - #@loop_vperp_vpa ivperp ivpa begin - # @views derivative_z!(dummy_zr1, pdf_in[ivpa,ivperp,:,:], buffer_r_1, buffer_r_2, buffer_r_3, - # buffer_r_4, z_spectral, z) - # @views derivative_z!(dummy_zr2, dummy_zr1, buffer_r_1, buffer_r_2, buffer_r_3, - # buffer_r_4, z_spectral, z) - # @. residual[ivpa,ivperp,:,:] -= num_diss_params.electron.z_dissipation_coefficient * dummy_zr2 - #end - begin_r_z_vperp_region() - @loop_r_z_vperp ir iz ivperp begin - #@views derivative!(vpa.scratch, pdf_in[:,ivperp,iz,ir], vpa, false) - #@views derivative!(vpa.scratch2, vpa.scratch, vpa, false) - #@. residual[:,ivperp,iz,ir] -= num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch2 - @views second_derivative!(vpa.scratch, pdf_in[:,ivperp,iz,ir], vpa, vpa_spectral) - @. pdf_out[:,ivperp,iz,ir] += dt * num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch + if num_diss_params.electron.vpa_dissipation_coefficient ≤ 0.0 + return nothing + end + + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views second_derivative!(vpa.scratch, pdf_in[:,ivperp,iz], vpa, vpa_spectral) + @. pdf_out[:,ivperp,iz] += dt * num_diss_params.electron.vpa_dissipation_coefficient * vpa.scratch + end + return nothing +end + +function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss_params, + z, vperp, vpa, vpa_spectral, z_speed, + dt, ir; f_offset=0) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + + vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient + + if vpa_dissipation_coefficient ≤ 0.0 + return nothing + end + + v_size = vperp.n * vpa.n + vpa_dense_second_deriv_matrix = vpa_spectral.dense_second_deriv_matrix + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Terms from add_dissipation_term!() + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa] + end end - #stop() + return nothing end @@ -2251,35 +3455,34 @@ end # add contribution to the residual coming from the term proporational to the pdf function add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, moments, vpa, - z, dt, electron_source_settings) - vth = moments.electron.vth - ddens_dz = moments.electron.ddens_dz - dvth_dz = moments.electron.dvth_dz - dqpar_dz = moments.electron.dqpar_dz - begin_r_z_vperp_vpa_region() - @loop_r_z ir iz begin - this_dqpar_dz = dqpar_dz[iz,ir] - this_ppar = ppar[iz,ir] - this_vth = vth[iz,ir] - this_ddens_dz = ddens_dz[iz,ir] - this_dens = dens[iz,ir] - this_dvth_dz = dvth_dz[iz,ir] - this_vth = vth[iz,ir] + z, dt, electron_source_settings, ir) + vth = @view moments.electron.vth[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + begin_z_vperp_vpa_region() + @loop_z iz begin + this_dqpar_dz = dqpar_dz[iz] + this_ppar = ppar[iz] + this_vth = vth[iz] + this_ddens_dz = ddens_dz[iz] + this_dens = dens[iz] + this_dvth_dz = dvth_dz[iz] + this_vth = vth[iz] @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] += + pdf_out[ivpa,ivperp,iz] += dt * (-0.5 * this_dqpar_dz / this_ppar - vpa[ivpa] * this_vth * (this_ddens_dz / this_dens - this_dvth_dz / this_vth)) * - pdf_in[ivpa,ivperp,iz,ir] - #pdf_out[ivpa, ivperp, :, :] -= (-0.5 * dqpar_dz[:, :] / ppar[:, :]) * pdf_in[ivpa, ivperp, :, :] + pdf_in[ivpa,ivperp,iz] end end for index ∈ eachindex(electron_source_settings) if electron_source_settings[index].active - @views source_density_amplitude = moments.electron.external_source_density_amplitude[:, :, index] - @views source_momentum_amplitude = moments.electron.external_source_momentum_amplitude[:, :, index] - @views source_pressure_amplitude = moments.electron.external_source_pressure_amplitude[:, :, index] - @loop_r_z ir iz begin + @views source_density_amplitude = moments.electron.external_source_density_amplitude[:, ir, index] + @views source_momentum_amplitude = moments.electron.external_source_momentum_amplitude[:, ir, index] + @views source_pressure_amplitude = moments.electron.external_source_pressure_amplitude[:, ir, index] + @loop_z iz begin term = dt * (1.5 * source_density_amplitude[iz,ir] / dens[iz,ir] - (0.5 * source_pressure_amplitude[iz,ir] + source_momentum_amplitude[iz,ir]) / ppar[iz,ir]) @@ -2293,6 +3496,134 @@ function add_contribution_from_pdf_term!(pdf_out, pdf_in, ppar, dens, upar, mome return nothing end +function add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; f_offset=0, ppar_offset=0) + + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + z_deriv_matrix = z_spectral.D_matrix_csr + v_size = vperp.n * vpa.n + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + v_remainder = (ivperp - 1) * vpa.n + ivpa + + # Terms from `add_contribution_from_pdf_term!()` + # (0.5/p*dq/dz + w_∥*vth*(1/n*dn/dz - 1/vth*dvth/dz))*g + # + # q = 2*p*vth*∫dw_∥ w_∥^3 g + # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g + # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz + # 0.5/p*dq/dz = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz + # = 1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - 0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + vth*∫dw_∥ w_∥^3 dg/dz + # d(0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = + # (1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + # + vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # d(0.5/p*dq/dz[irowz])/d(p[icolz]) = + # (-3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - 1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + 1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) + # + (1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + # + # dvth/dz = d/dz(sqrt(2*p/n/me)) + # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz + # = 1/n/me/vth*dp/dz - p/n^2/me/vth*dn/dz + # = 1/n/me/vth*dp/dz - 1/2*vth/n*dn/dz + # ⇒ vth*(1/n*dn/dz - 1/vth*dvth/dz) + # = (vth/n*dn/dz - dvth/dz) + # = (vth/n*dn/dz - 1/n/me/vth*dp/dz + 1/2*vth/n*dn/dz) + # = (3/2*vth/n*dn/dz - 1/n/me/vth*dp/dz) + # = (3/2*sqrt(2*p/me)/n^(3/2)*dn/dz - 1/sqrt(2*p*n*me)*dp/dz) + # d(vth*(1/n*dn/dz - 1/vth*dvth/dz)[irowz])/d(ppar[icolz]) = + # (3/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz + 1/2/sqrt(2*n*me)/p^(3/2)*dp/dz)[irowz] * delta(irowz,icolz) + # -1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] + # + jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz] + + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz] + - dvth_dz[iz] / vth[iz])) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + z_deriv_row_startind = z_deriv_matrix.rowptr[iz] + z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 + z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] + z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * vth[iz] * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + end + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + # Source terms from `add_contribution_from_pdf_term!()` + jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz,ir,index] / dens[iz] + - (0.5 * source_pressure_amplitude[iz,ir,index] + + source_momentum_amplitude[iz,ir,index]) / ppar[iz] + ) + end + end + jacobian_matrix[row,ppar_offset+iz] += + dt * f[ivpa,ivperp,iz] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz] + + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz] + + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz])) + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] + - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_entry + end + end + + return nothing +end + +function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt, ion_dt, + ir; ppar_offset=0) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = ppar_offset + iz + + # Backward-Euler forcing term + jacobian_matrix[row,row] += dt / ion_dt + end + + return nothing +end + # function check_electron_pdf_convergence!(electron_pdf_converged, pdf_new, pdf) # # check to see if the electron pdf has converged to within the specified tolerance # # NB: the convergence criterion is based on the average relative difference between the diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 5f8abe6b5..6fa6b34e8 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -4,9 +4,12 @@ module electron_vpa_advection export electron_vpa_advection! export update_electron_speed_vpa! +export add_electron_vpa_advection_to_Jacobian! using ..looping +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..calculus: derivative!, second_derivative! +using ..gauss_legendre: gausslegendre_info """ calculate the wpa-advection term for the electron kinetic equation @@ -14,39 +17,33 @@ calculate the wpa-advection term for the electron kinetic equation """ function electron_vpa_advection!(pdf_out, pdf_in, density, upar, ppar, moments, advect, vpa, spectral, scratch_dummy, dt, - electron_source_settings) - begin_r_z_vperp_region() + electron_source_settings, ir) + begin_z_vperp_region() # create a reference to a scratch_dummy array to store the wpa-derivative of the electron pdf - dpdf_dvpa = scratch_dummy.buffer_vpavperpzr_1 - #d2pdf_dvpa2 = scratch_dummy.buffer_vpavperpzr_2 - begin_r_z_vperp_region() + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + #d2pdf_dvpa2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + # get the updated speed along the wpa direction using the current pdf @views update_electron_speed_vpa!(advect[1], density, upar, ppar, moments, vpa.grid, - electron_source_settings) - # update adv_fac -- note that there is no factor of dt here because - # in some cases the electron kinetic equation is solved as a steady-state equation iteratively - @loop_r_z_vperp ir iz ivperp begin + electron_source_settings, ir) + # update adv_fac + @loop_z_vperp iz ivperp begin @views @. advect[1].adv_fac[:,ivperp,iz,ir] = -advect[1].speed[:,ivperp,iz,ir] end #calculate the upwind derivative of the electron pdf w.r.t. wpa - @loop_r_z_vperp ir iz ivperp begin - @views derivative!(dpdf_dvpa[:,ivperp,iz,ir], pdf_in[:,ivperp,iz,ir], vpa, + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], pdf_in[:,ivperp,iz], vpa, advect[1].adv_fac[:,ivperp,iz,ir], spectral) end - #@loop_r_z_vperp ir iz ivperp begin - # @views second_derivative!(d2pdf_dvpa2[:,ivperp,iz,ir], pdf_in[:,ivperp,iz,ir], vpa, spectral) + #@loop_z_vperp iz ivperp begin + # @views second_derivative!(d2pdf_dvpa2[:,ivperp,iz], pdf_in[:,ivperp,iz], vpa, spectral) #end # calculate the advection term - @loop_r_z_vperp ir iz ivperp begin - @. pdf_out[:,ivperp,iz,ir] += dt * advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz,ir] - #@. pdf_out[:,ivperp,iz,ir] -= advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz,ir] + 0.0001*d2pdf_dvpa2[:,ivperp,iz,ir] + @loop_z_vperp iz ivperp begin + @. pdf_out[:,ivperp,iz] += dt * advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] + #@. pdf_out[:,ivperp,iz] -= advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] + 0.0001*d2pdf_dvpa2[:,ivperp,iz] end - #@loop_vpa ivpa begin - # println("electron_vpa_advection: ", pdf_out[ivpa,1,10,1], " vpa: ", vpa.grid[ivpa], " dpdf_dvpa: ", dpdf_dvpa[ivpa,1,10,1], - # " pdf: ", pdf[ivpa,1,10,1]) - #end - #exit() return nothing end @@ -54,31 +51,29 @@ end calculate the electron advection speed in the wpa-direction at each grid point """ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, - electron_source_settings) - vth = moments.electron.vth - dppar_dz = moments.electron.dppar_dz - dqpar_dz = moments.electron.dqpar_dz - dvth_dz = moments.electron.dvth_dz + electron_source_settings, ir) + vth = @view moments.electron.vth[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] # calculate the advection speed in wpa - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - # TMP FOR TESTING - #advect.speed[ivpa,ivperp,iz,ir] = vth[iz,ir] * dppar_dz[iz,ir] / (2 * ppar[iz,ir]) - advect.speed[ivpa,ivperp,iz,ir] = ((vth[iz,ir] * dppar_dz[iz,ir] + vpa[ivpa] * dqpar_dz[iz,ir]) - / (2 * ppar[iz,ir]) - vpa[ivpa]^2 * dvth_dz[iz,ir]) + @loop_z_vperp_vpa iz ivperp ivpa begin + advect.speed[ivpa,ivperp,iz,ir] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz]) + / (2 * ppar[iz]) - vpa[ivpa]^2 * dvth_dz[iz]) end for index ∈ eachindex(electron_source_settings) if electron_source_settings[index].active - @views source_density_amplitude = moments.electron.external_source_density_amplitude[:, :, index] - @views source_momentum_amplitude = moments.electron.external_source_momentum_amplitude[:, :, index] - @views source_pressure_amplitude = moments.electron.external_source_pressure_amplitude[:, :, index] - @loop_r_z ir iz begin - term1 = source_density_amplitude[iz,ir] * upar[iz,ir]/(density[iz,ir]*vth[iz,ir]) + @views source_density_amplitude = moments.electron.external_source_density_amplitude[:, ir, index] + @views source_momentum_amplitude = moments.electron.external_source_momentum_amplitude[:, ir, index] + @views source_pressure_amplitude = moments.electron.external_source_pressure_amplitude[:, ir, index] + @loop_z iz begin + term1 = source_density_amplitude[iz] * upar[iz]/(density[iz]*vth[iz]) term2_over_vpa = - -0.5 * (source_pressure_amplitude[iz,ir] + - 2.0 * upar[iz,ir] * source_momentum_amplitude[iz,ir]) / - ppar[iz,ir] + - 0.5 * source_density_amplitude[iz,ir] / density[iz,ir] + -0.5 * (source_pressure_amplitude[iz] + + 2.0 * upar[iz] * source_momentum_amplitude[iz]) / + ppar[iz] + + 0.5 * source_density_amplitude[iz] / density[iz] @loop_vperp_vpa ivperp ivpa begin advect.speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa end @@ -87,5 +82,197 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, end return nothing end +# Alternative version with loop over r is used for adaptive timestep update +function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, + electron_source_settings) + @loop_r ir begin + @views update_electron_speed_vpa!(advect, density[:,ir], upar[:,ir], ppar[:,ir], + moments, vpa, electron_source_settings, ir) + end + return nothing +end + +function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, + vth, third_moment, ddens_dz, dppar_dz, + dthird_moment_dz, moments, me, z, vperp, + vpa, z_spectral, vpa_spectral, + vpa_advect, z_speed, scratch_dummy, + external_source_settings, dt, ir; + f_offset=0, ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + v_size = vperp.n * vpa.n + source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir,:] + source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir,:] + source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir,:] + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + if !isa(vpa_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral vpa-coordinate type is supported by " + * "add_electron_vpa_advection_to_Jacobian!() preconditioner because we " + * "need differentiation matrices.") + end + + z_deriv_matrix = z_spectral.D_matrix_csr + vpa_Dmat = vpa_spectral.lobatto.Dmat + vpa_element_scale = vpa.element_scale + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + ielement_vpa = vpa.ielement[ivpa] + igrid_vpa = vpa.igrid[ivpa] + icolumn_min_vpa = vpa.imin[ielement_vpa] - (ielement_vpa != 1) + f_offset + icolumn_max_vpa = vpa.imax[ielement_vpa] + f_offset + + vpa_speed = vpa_advect[1].speed[ivpa,ivperp,iz,ir] + + # Contributions from + # (1/2*vth/p*dp/dz + 1/2*w_∥/p*dq/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) * dg/dw_∥ + if ielement_vpa == 1 && igrid_vpa == 1 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa] + elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + elseif igrid_vpa == vpa.ngrid + # Note igrid_vpa is only ever 1 when ielement_vpa==1, because + # of the way element boundaries are counted. + icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1 + icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] + if vpa_speed < 0.0 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] + elseif vpa_speed > 0.0 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + else + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] + end + else + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa] + end + # q = 2*p*vth*∫dw_∥ w_∥^3 g + # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g + # dq/dz = 3*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - p^(3/2)*sqrt(2/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + 2*p*vth*∫dw_∥ w_∥^3 dg/dz + # w_∥*0.5/p*dq/dz = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + w_∥*sqrt(2*p/n/me)*∫dw_∥ w_∥^3 dg/dz + # = w_∥*1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g * dp/dz + # - w_∥*0.5*sqrt(2*p/me)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + # + w_∥*vth*∫dw_∥ w_∥^3 dg/dz + # d(w_∥*0.5/p*dq/dz[irowz])/d(g[icolvpa,icolvperp,icolz]) = + # w_∥*(1.5*sqrt(2/p/n/me)*dp/dz - 0.5*sqrt(2*p/me)/n^(3/2)*dn/dz) * delta(irowz,icolz) * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + # + w_∥*vth * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] + # d(w_∥*0.5/p*dq/dz[irowz])/d(p[icolz]) = + # (-w_∥*3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - w_∥*1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + w_∥*1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) + # + w_∥*(1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] + - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + z_deriv_row_startind = z_deriv_matrix.rowptr[iz] + z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 + z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] + z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + end + jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz]) + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_entry + end + # (1/2*vth/p*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) + # = (1/2*sqrt(2/p/n)*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/sqrt(2*p*n) + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n) + # + # dvth/dz = d/dz(sqrt(2*p/n/me)) + # = 1/n/me/sqrt(2*p/n/me)*dp/dz - p/n^2/me/sqrt(2*p/n/me)*dn/dz + # = 1/sqrt(2*p*n*me)*dp/dz - 1/2*sqrt(2*p/n/me)/n*dn/dz + # d(dvth/dz[irowz])/d(ppar[icolz]) = + # (-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz)[irowz] * delta(irowz,icolz) + # +1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] + # + # ⇒ d((1/2*vth/p*dp/dz - w_∥^2*dvth/dz + # + source_density_amplitude*u/n/vth + # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p + # + w_∥*1/2*source_density_amplitude/n)[irowz]/d(ppar[icolz]) + # = (-1/4*sqrt(2/n/me)/p^(3/2)*dp/dz + # - w_∥^2*(-1/2/sqrt(2*n*me)/p^(3/2)*dp/dz - 1/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz) + # - 1/2*source_density_amplitude*u/sqrt(2*n)/p^(3/2) + # + w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p^2)[irowz] * delta(irowz,icolz) + # + (1/2*sqrt(2/p/n/me) - w_∥^2/sqrt(2*p*n*me))[irowz] * z_deriv_matrix[irowz,icolz] + jacobian_matrix[row,ppar_offset+iz] += dt * ( + -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz] + - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]) + ) * dpdf_dvpa[ivpa,ivperp,iz] + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + jacobian_matrix[row,ppar_offset+iz] += dt * ( + -0.5*source_density_amplitude[iz,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 + + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,index] + + 2.0*upar[iz]*source_momentum_amplitude[iz,index])/ppar[iz]^2 + ) * dpdf_dvpa[ivpa,ivperp,iz] + end + end + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * ( + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me) + - vpa.grid[ivpa]^2/sqrt(2.0*ppar[iz]*dens[iz]*me) + ) * dpdf_dvpa[ivpa,ivperp,iz] * z_deriv_entry + end + end + + return nothing +end end diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index b0b8f26aa..c79a934da 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -4,44 +4,51 @@ module electron_z_advection export electron_z_advection! export update_electron_speed_z! +export add_electron_z_advection_to_Jacobian! using ..advection: advance_f_df_precomputed! +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..chebyshev: chebyshev_info +using ..gauss_legendre: gausslegendre_info using ..looping -using ..derivatives: derivative_z! -using ..calculus: second_derivative!, derivative! +using ..derivatives: derivative_z_pdf_vpavperpz! +using ..calculus: second_derivative! """ calculate the z-advection term for the electron kinetic equation = wpa * vthe * df/dz """ function electron_z_advection!(pdf_out, pdf_in, upar, vth, advect, z, vpa, spectral, - scratch_dummy, dt) - begin_r_vperp_vpa_region() + scratch_dummy, dt, ir) + begin_vperp_vpa_region() # create a pointer to a scratch_dummy array to store the z-derivative of the electron pdf - dpdf_dz = scratch_dummy.buffer_vpavperpzr_1 - d2pdf_dz2 = scratch_dummy.buffer_vpavperpzr_2 - begin_r_vperp_vpa_region() + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + d2pdf_dz2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_vperp_vpa_region() # get the updated speed along the z direction using the current pdf - @views update_electron_speed_z!(advect[1], upar, vth, vpa) + @views update_electron_speed_z!(advect[1], upar, vth, vpa, ir) # update adv_fac -- note that there is no factor of dt here because # in some cases the electron kinetic equation is solved as a steady-state equation iteratively - @loop_r_vperp_vpa ir ivperp ivpa begin + @loop_vperp_vpa ivperp ivpa begin @views advect[1].adv_fac[:,ivpa,ivperp,ir] = -advect[1].speed[:,ivpa,ivperp,ir] end #calculate the upwind derivative - derivative_z!(dpdf_dz, pdf_in, - advect, scratch_dummy.buffer_vpavperpr_1, - scratch_dummy.buffer_vpavperpr_2, scratch_dummy.buffer_vpavperpr_3, - scratch_dummy.buffer_vpavperpr_4, scratch_dummy.buffer_vpavperpr_5, - scratch_dummy.buffer_vpavperpr_6, spectral, z) - #@loop_r_vperp_vpa ir ivperp ivpa begin - # @views second_derivative!(d2pdf_dz2[ivpa,ivperp,:,ir], pdf[ivpa,ivperp,:,ir], z, spectral) + @views derivative_z_pdf_vpavperpz!( + dpdf_dz, pdf_in, advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], spectral, z) + #@loop_vperp_vpa ivperp ivpa begin + # @views second_derivative!(d2pdf_dz2[ivpa,ivperp,:], pdf_in[ivpa,ivperp,:], z, spectral) #end # calculate the advection term - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz,ir] - #pdf_out[ivpa,ivperp,iz,ir] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz,ir] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz,ir] + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + #pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz] end return nothing end @@ -49,12 +56,115 @@ end """ calculate the electron advection speed in the z-direction at each grid point """ -function update_electron_speed_z!(advect, upar, vth, vpa) +function update_electron_speed_z!(advect, upar, vth, vpa, ir) # the electron advection speed in z is v_par = w_par * v_the - @loop_r_vperp_vpa ir ivperp ivpa begin - #@. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth[:,ir] - @. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth[:,ir] + upar[:,ir] + @loop_vperp_vpa ivperp ivpa begin + #@. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + @. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + upar + end + return nothing +end +# Alternative version with loop over r is used for adaptive timestep update +function update_electron_speed_z!(advect, upar, vth, vpa) + @loop_r ir begin + @views update_electron_speed_z!(advect, upar[:,ir], vth[:,ir], vpa, ir) + end + return nothing +end + +function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, vth, + me, z, vperp, vpa, z_spectral, z_advect, + scratch_dummy, dt, ir; f_offset=0, + ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + + v_size = vperp.n * vpa.n + + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed_array = @view z_advect[1].speed[:,:,:,1] + + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed_array[:,ivpa,ivperp] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], + z_spectral, z) + + if !isa(z_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral z-coordinate type is supported by " + * "add_electron_z_advection_to_Jacobian!() preconditioner because we need " + * "differentiation matrices.") + end + z_Dmat = z_spectral.lobatto.Dmat + z_element_scale = z.element_scale + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed_array) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + v_remainder = (ivperp - 1) * vpa.n + ivpa + f_offset + + ielement_z = z.ielement[iz] + igrid_z = z.igrid[iz] + icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1) + icolumn_max_z = z.imax[ielement_z] + + z_speed = z_speed_array[iz,ivpa,ivperp] + + # Contributions from (w_∥*vth + upar)*dg/dz + if ielement_z == 1 && igrid_z == 1 + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z] + elseif ielement_z == z.nelement_local && igrid_z == z.ngrid + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] + elseif igrid_z == z.ngrid + # Note igrid_z is only ever 1 when ielement_z==1, because + # of the way element boundaries are counted. + icolumn_min_z_next = z.imin[ielement_z+1] - 1 + icolumn_max_z_next = z.imax[ielement_z+1] + if z_speed < 0.0 + jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] + elseif z_speed > 0.0 + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] + else + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z] + jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] + end + else + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z] + end + # vth = sqrt(2*p/n/me) + # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth + # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz + jacobian_matrix[row,ppar_offset+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz] end + return nothing end diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 4b8658582..ca990f9e6 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -15,11 +15,13 @@ export setup_external_sources!, external_ion_source!, external_neutral_source!, external_ion_source_controller!, external_neutral_source_controller!, initialize_external_source_amplitude!, initialize_external_source_controller_integral!, + add_external_electron_source_to_Jacobian!, total_external_ion_sources!, total_external_neutral_sources!, total_external_ion_source_controllers!, total_external_neutral_source_controllers!, external_electron_source!, total_external_electron_sources! using ..array_allocation: allocate_float, allocate_shared_float +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..calculus using ..communication using ..coordinates @@ -315,13 +317,13 @@ function setup_external_sources!(input_dict, r, z, electron_physics) PI_controller_amplitude, controller_source_profile, PI_density_target_ir, PI_density_target_iz, PI_density_target_rank) end - function get_settings_electrons(ion_settings) + function get_settings_electrons(i, ion_settings) # Note most settings for the electron source are copied from the ion source, # because we require that the particle sources are the same for ions and # electrons. `source_T` can be set independently, and when using # `source_type="energy"`, the `source_strength` could also be set. input = set_defaults_and_check_section!( - input_dict, "electron_source"; + input_dict, "electron_source_$i"; source_strength=ion_settings.source_strength, source_T=ion_settings.source_T, ) @@ -337,8 +339,8 @@ function setup_external_sources!(input_dict, r, z, electron_physics) input["source_strength"] = ion_settings.source_strength end return electron_source_data(input["source_strength"], input["source_T"], - ion_settings.active, ion_settings.r_amplitude, - ion_settings.z_amplitude, ion_settings.source_type) + ion_settings.active, ion_settings.r_amplitude, + ion_settings.z_amplitude, ion_settings.source_type) end # put all ion sources into ion_source_data struct vector @@ -359,9 +361,9 @@ function setup_external_sources!(input_dict, r, z, electron_physics) electron_sources = electron_source_data[] if electron_physics ∈ (braginskii_fluid, kinetic_electrons, kinetic_electrons_with_temperature_equation) - electron_sources = [get_settings_electrons(this_source) for this_source ∈ ion_sources] + electron_sources = [get_settings_electrons(i, this_source) for (i,this_source) ∈ enumerate(ion_sources)] else - electron_sources = [get_settings_electrons(get_settings_ions(1, false))] + electron_sources = [get_settings_electrons(1, get_settings_ions(1, false))] end # put all neutral sources into neutral_source_data struct vector @@ -920,18 +922,20 @@ function external_ion_source!(pdf, fvec, moments, ion_source, index, vperp, vpa, end """ - total_external_electron_sources!(pdf, fvec, moments, electron_sources, vperp, vpa, dt, scratch_dummy) + total_external_electron_sources!(pdf_out, pdf_in, electron_density, electron_upar, + moments, composition, electron_sources, vperp, + vpa, dt, ir) Contribute all of the electron sources to the electron pdf, one by one. """ function total_external_electron_sources!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_sources, vperp, - vpa, dt) + vpa, dt, ir) for index ∈ eachindex(electron_sources) if electron_sources[index].active external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_sources[index], index, - vperp, vpa, dt) + vperp, vpa, dt, ir) end end return nothing @@ -940,18 +944,21 @@ end """ external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, moments, composition, electron_source, index, vperp, - vpa, dt) + vpa, dt, ir) Add external source term to the electron kinetic equation. + +Note that this function operates on a single point in `r`, given by `ir`, and `pdf_out`, +`pdf_in`, `electron_density`, and `electron_upar` should have no r-dimension. """ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_upar, - moments, composition, electron_source, index, vperp, - vpa, dt) - begin_r_z_vperp_region() + moments, composition, electron_source, index, + vperp, vpa, dt, ir) + begin_z_vperp_region() me_over_mi = composition.me_over_mi - @views source_amplitude = moments.electron.external_source_amplitude[:, :, index] + @views source_amplitude = moments.electron.external_source_amplitude[:,ir,index] source_T = electron_source.source_T if vperp.n == 1 vth_factor = 1.0 / sqrt(source_T / me_over_mi) @@ -961,18 +968,18 @@ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_u vpa_grid = vpa.grid vperp_grid = vperp.grid - vth = moments.electron.vth - @loop_r_z ir iz begin - this_vth = vth[iz,ir] - this_upar = electron_upar[iz,ir] - this_prefactor = dt * this_vth / electron_density[iz,ir] * vth_factor * - source_amplitude[iz,ir] + vth = @view moments.electron.vth[:,ir] + @loop_z iz begin + this_vth = vth[iz] + this_upar = electron_upar[iz] + this_prefactor = dt * this_vth / electron_density[iz] * vth_factor * + source_amplitude[iz] @loop_vperp_vpa ivperp ivpa begin # Factor of 1/sqrt(π) (for 1V) or 1/π^(3/2) (for 2V/3V) is absorbed by the # normalisation of F vperp_unnorm = vperp_grid[ivperp] * this_vth vpa_unnorm = vpa_grid[ivpa] * this_vth + this_upar - pdf_out[ivpa,ivperp,iz,ir] += + pdf_out[ivpa,ivperp,iz] += this_prefactor * exp(-(vperp_unnorm^2 + vpa_unnorm^2) * me_over_mi / source_T) end @@ -980,33 +987,98 @@ function external_electron_source!(pdf_out, pdf_in, electron_density, electron_u if electron_source.source_type == "energy" # Take particles out of pdf so source does not change density - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * source_amplitude[iz,ir] * - pdf_in[ivpa,ivperp,iz,ir] + @loop_z_vperp_vpa iz ivperp ivpa begin + pdf_out[ivpa,ivperp,iz] -= dt * source_amplitude[iz] * + pdf_in[ivpa,ivperp,iz] end end return nothing end +function add_total_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir; + f_offset=0, ppar_offset=0) + for index ∈ eachindex(electron_sources) + add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me, + z_speed, electron_sources[index], index, + z, vperp, vpa, dt, ir; + f_offset=f_offset, + ppar_offset=ppar_offset) + end +end -""" - total_external_neutral_sources!(pdf, fvec, moments, neutral_sources, vperp, vpa, dt, scratch_dummy) +function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me, + z_speed, electron_source, index, z, + vperp, vpa, dt, ir; f_offset=0, + ppar_offset=0) + if f_offset == ppar_offset + error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " + * "cannot be in same place in state vector.") + end + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n -Contribute all of the neutral sources to the neutral pdf, one by one. -""" -function total_external_neutral_sources!(pdf, fvec, moments, neutral_sources, - vzeta, vr, vz, dt) - for index ∈ eachindex(neutral_sources) - if neutral_sources[index].active - external_neutral_source!(pdf, fvec, moments, neutral_sources[index], - index, vzeta, vr, vz, dt) + if !electron_source.active + return nothing + end + + source_amplitude = @view moments.electron.external_source_amplitude[:,ir,index] + source_T = electron_source.source_T + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + if vperp.n == 1 + vth_factor = 1.0 / sqrt(source_T / me) + else + vth_factor = 1.0 / sqrt(source_T / me)^1.5 + end + vperp_grid = vperp.grid + vpa_grid = vpa.grid + v_size = vperp.n * vpa.n + + begin_z_vperp_vpa_region() + if electron_source.source_type == "energy" + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Contribution from `external_electron_source!()` + jacobian_matrix[row,row] += dt * source_amplitude[iz] end end + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Contributions from + # -vth/n*vth_factor*source_amplitude*exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) + # Using + # d(vth[irowz])/d(ppar[icolz]) = 1/2*vth/ppar * delta(irowz,icolz) + # + # d(exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)[irowz])/d(ppar[icolz]) + # = -2*(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * 1/2*vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) + # = -(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * vth[iz] / dens[iz] * vth_factor * source_amplitude[iz] * + (0.5/ppar[iz] - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])*vpa_grid[ivpa])*me/source_T*vth[iz]/ppar[iz]) * + exp(-((vperp_grid[ivperp]*vth[iz])^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])^2) * me / source_T) + end + return nothing end - """ external_neutral_source!(pdf, fvec, moments, neutral_source_settings, vzeta, vr, vz, dt) diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 7d4b5f2e1..97c31d54e 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -25,6 +25,7 @@ using FastGaussQuadrature using LegendrePolynomials: Pl, dnPl using LinearAlgebra: mul!, lu, LU using SparseArrays: sparse, AbstractSparseArray +using SparseMatricesCSR using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float import ..calculus: elementwise_derivative!, mass_matrix_solve! @@ -82,7 +83,7 @@ struct gausslegendre_base_info Y31::Array{mk_float,3} end -struct gausslegendre_info{TSparse, TLU, TLmat, TLmatLU} <: weak_discretization_info +struct gausslegendre_info{TSparse, TSparseCSR, TLU, TLmat, TLmatLU} <: weak_discretization_info lobatto::gausslegendre_base_info radau::gausslegendre_base_info # global (1D) mass matrix @@ -94,6 +95,13 @@ struct gausslegendre_info{TSparse, TLU, TLmat, TLmatLU} <: weak_discretization_i K_matrix::TSparse # global (1D) weak Laplacian derivative matrix L_matrix::TSparse + # global (1D) strong first derivative matrix + D_matrix::TSparse + # global (1D) strong first derivative matrix in Compressed Sparse Row (CSR) format + D_matrix_csr::TSparseCSR + # global (1D) weak second derivative matrix, with inverse mass matrix included (so + # matrix is dense) + dense_second_deriv_matrix::Array{mk_float,2} # global (1D) weak Laplacian derivative matrix with boundary conditions - might be # `nothing` if boundary conditions are not supported L_matrix_with_bc::TLmat @@ -119,12 +127,15 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) mass_matrix = allocate_float(coord.n,coord.n) K_matrix = allocate_float(coord.n,coord.n) L_matrix = allocate_float(coord.n,coord.n) + D_matrix = allocate_float(coord.n,coord.n) dirichlet_bc = (coord.bc in ["zero", "constant"]) # and further options in future periodic_bc = (coord.bc == "periodic") setup_global_weak_form_matrix!(mass_matrix, lobatto, radau, coord, "M"; periodic_bc=periodic_bc) setup_global_weak_form_matrix!(K_matrix, lobatto, radau, coord, "K_with_BC_terms"; periodic_bc=periodic_bc) setup_global_weak_form_matrix!(L_matrix, lobatto, radau, coord, "L_with_BC_terms") + setup_global_strong_form_matrix!(D_matrix, lobatto, radau, coord, "D"; periodic_bc=periodic_bc) + dense_second_deriv_matrix = inv(mass_matrix) * K_matrix mass_matrix_lu = lu(sparse(mass_matrix)) if dirichlet_bc || periodic_bc L_matrix_with_bc = allocate_float(coord.n,coord.n) @@ -138,7 +149,7 @@ function setup_gausslegendre_pseudospectral(coord; collision_operator_dim=true) Qmat = allocate_float(coord.ngrid,coord.ngrid) - return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),L_matrix_with_bc, + return gausslegendre_info(lobatto,radau,mass_matrix,sparse(S_matrix),sparse(K_matrix),sparse(L_matrix),sparse(D_matrix),convert(SparseMatrixCSR{1,mk_float,mk_int},D_matrix),dense_second_deriv_matrix,L_matrix_with_bc, mass_matrix_lu,L_matrix_lu,Qmat) end @@ -898,7 +909,7 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, @. QQ_global[iminl:imaxl,iminl:imaxl] += QQ_j[:,:] k = 1 end - + if dirichlet_bc # Make matrix diagonal for first/last grid points so it does not change the values # there @@ -940,6 +951,79 @@ function setup_global_weak_form_matrix!(QQ_global::Array{mk_float,2}, return nothing end +""" +A function that assigns the local matrices to +a global array QQ_global for later evaluating strong form of required 1D equation. + +The 'option' variable is a flag for +choosing the type of matrix to be constructed. +Currently the function is set up to assemble the +elemental matrices without imposing boundary conditions on the +first and final rows of the matrix. This means that +the operators constructed from this function can only be used +for differentiation, and not solving 1D ODEs. +The shared points in the element assembly are +averaged (instead of simply added) to be consistent with the +derivative_elements_to_full_grid!() function in calculus.jl. +""" +function setup_global_strong_form_matrix!(QQ_global::Array{mk_float,2}, + lobatto::gausslegendre_base_info, + radau::gausslegendre_base_info, + coord,option; periodic_bc=false) + QQ_j = allocate_float(coord.ngrid,coord.ngrid) + QQ_jp1 = allocate_float(coord.ngrid,coord.ngrid) + + ngrid = coord.ngrid + imin = coord.imin + imax = coord.imax + @. QQ_global = 0.0 + + # fill in first element + j = 1 + # N.B. QQ varies with ielement for vperp, but not vpa + # a radau element is used for the vperp grid (see get_QQ_local!()) + get_QQ_local!(QQ_j,j,lobatto,radau,coord,option) + if periodic_bc && coord.nrank != 1 + error("periodic boundary conditions not supported when dimension is distributed") + end + if periodic_bc && coord.nrank == 1 + QQ_global[imax[end], imin[j]:imax[j]] .+= QQ_j[1,:] ./ 2.0 + QQ_global[1,1] += 1.0 + QQ_global[1,end] += -1.0 + else + QQ_global[imin[j],imin[j]:imax[j]] .+= QQ_j[1,:] + end + for k in 2:imax[j]-imin[j] + QQ_global[k,imin[j]:imax[j]] .+= QQ_j[k,:] + end + if coord.nelement_local > 1 + QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:]./2.0 + else + QQ_global[imax[j],imin[j]:imax[j]] .+= QQ_j[ngrid,:] + end + # remaining elements recalling definitions of imax and imin + for j in 2:coord.nelement_local + get_QQ_local!(QQ_j,j,lobatto,radau,coord,option) + #lower boundary assembly on element + QQ_global[imin[j]-1,imin[j]-1:imax[j]] .+= QQ_j[1,:]./2.0 + for k in 2:imax[j]-imin[j]+1 + QQ_global[k+imin[j]-2,imin[j]-1:imax[j]] .+= QQ_j[k,:] + end + # upper boundary assembly on element + if j == coord.nelement_local + if periodic_bc && coord.nrank == 1 + QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] / 2.0 + else + QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:] + end + else + QQ_global[imax[j],imin[j]-1:imax[j]] .+= QQ_j[ngrid,:]./2.0 + end + end + + return nothing +end + function get_QQ_local!(QQ::Array{mk_float,2},ielement, lobatto::gausslegendre_base_info, radau::gausslegendre_base_info, @@ -967,6 +1051,8 @@ function get_QQ_local!(QQ::Array{mk_float,2},ielement, get_LL_local!(QQ,ielement,lobatto,radau,coord) elseif option == "L_with_BC_terms" get_LL_local!(QQ,ielement,lobatto,radau,coord,explicit_BC_terms=true) + elseif option == "D" + get_DD_local!(QQ,ielement,lobatto,radau,coord) end return nothing end @@ -1136,6 +1222,18 @@ function get_LL_local!(QQ,ielement, return nothing end +# Strong-form differentiation matrix +function get_DD_local!(QQ, ielement, lobatto::gausslegendre_base_info, + radau::gausslegendre_base_info, coord) + scale_factor = coord.element_scale[ielement] + if coord.name == "vperp" && ielement == 1 && coord.irank == 0 + @. QQ = radau.Dmat / scale_factor + else + @. QQ = lobatto.Dmat / scale_factor + end + return nothing +end + # mass matrix without vperp factor (matrix N) # only useful for the vperp coordinate function get_MN_local!(QQ,ielement, diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 0c9ae84d7..2ba8ccd4f 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -26,6 +26,7 @@ using ..electron_kinetic_equation: implicit_electron_advance! using ..em_fields: update_phi! using ..file_io: setup_electron_io, write_electron_state, finish_electron_io using ..load_data: reload_electron_data! +using ..moment_constraints: hard_force_moment_constraints! using ..moment_kinetics_structs: scratch_pdf, pdf_substruct, electron_pdf_substruct, pdf_struct, moments_struct, boundary_distributions_struct using ..nonlinear_solvers: nl_solver_info @@ -266,7 +267,7 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z external_source_settings, scratch_dummy, scratch, scratch_electron, nl_solver_params, t_params, t_input, num_diss_params, advection_structs, io_input, input_dict; - restart_electron_physics) + restart_electron_physics, skip_electron_solve=false) moments.electron.dens_updated[] = false # initialise the electron density profile @@ -363,23 +364,21 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z # # q at the boundaries tells us dTe/dz for Braginskii electrons nu_ei = collisions.electron_fluid.nu_ei + dTe_dz_lower = Ref{mk_float}(0.0) if z.irank == 0 - dTe_dz_lower = @. -moments.electron.qpar[1,:] * 2.0 / 3.16 / - moments.electron.ppar[1,:] * - composition.me_over_mi * nu_ei - else - dTe_dz_lower = nothing + dTe_dz_lower[] = @. -moments.electron.qpar[1,:] * 2.0 / 3.16 / + moments.electron.ppar[1,:] * + composition.me_over_mi * nu_ei end - dTe_dz_lower = MPI.bcast(dTe_dz_lower, z.comm; root=0) + MPI.Bcast!(dTe_dz_lower, z.comm; root=0) + dTe_dz_upper = Ref{mk_float}(0.0) if z.irank == z.nrank - 1 - dTe_dz_upper = @. -moments.electron.qpar[end,:] * 2.0 / 3.16 / - moments.electron.ppar[end,:] * - composition.me_over_mi * nu_ei - else - dTe_dz_upper = nothing + dTe_dz_upper[] = @. -moments.electron.qpar[end,:] * 2.0 / 3.16 / + moments.electron.ppar[end,:] * + composition.me_over_mi * nu_ei end - dTe_dz_upper = MPI.bcast(dTe_dz_upper, z.comm; root=(z.nrank - 1)) + MPI.Bcast!(dTe_dz_upper, z.comm; root=(z.nrank - 1)) # The temperature should already be equal to the 'Boltzmann electron' # Te, so we just need to add a cubic that vanishes at ±Lz/2 @@ -400,9 +399,9 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z # 2*B - 3*2*B = -4*B = dTe/dz_upper + dTe/dz_lower Lz = z.L zg = z.grid - C = @. (dTe_dz_upper - dTe_dz_lower) / 2.0 / Lz + C = @. (dTe_dz_upper[] - dTe_dz_lower[]) / 2.0 / Lz A = @. -C * Lz^2 / 4 - B = @. -(dTe_dz_lower + dTe_dz_upper) / 4.0 + B = @. -(dTe_dz_lower[] + dTe_dz_upper[]) / 4.0 D = @. -4.0 * B / Lz^2 @loop_r ir begin @. moments.electron.temp[:,ir] += A[ir] + B[ir]*zg + C[ir]*zg^2 + @@ -451,11 +450,7 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z scratch[1].electron_ppar .= moments.electron.ppar scratch[1].electron_pperp .= 0.0 #moments.electron.pperp scratch[1].electron_temp .= moments.electron.temp - if t_params.electron === nothing - n_rk_stages = length(scratch) - 1 - else - n_rk_stages = t_params.electron.n_rk_stages - end + n_rk_stages = t_params.n_rk_stages scratch[n_rk_stages+1].electron_density .= moments.electron.dens scratch[n_rk_stages+1].electron_upar .= moments.electron.upar scratch[n_rk_stages+1].electron_ppar .= moments.electron.ppar @@ -476,7 +471,8 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z advection_structs.electron_vpa_advect, scratch_dummy, collisions, composition, geometry, external_source_settings, num_diss_params, gyroavs, nl_solver_params, t_params, - t_input["electron_t_input"], io_input, input_dict) + t_input["electron_t_input"], io_input, input_dict; + skip_electron_solve=skip_electron_solve) return nothing end @@ -573,7 +569,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field scratch_dummy, collisions, composition, geometry, external_source_settings, num_diss_params, gyroavs, nl_solver_params, t_params, t_input, io_input, - input_dict) + input_dict; skip_electron_solve) # now that the initial electron pdf is given, the electron parallel heat flux should be updated # if using kinetic electrons @@ -707,24 +703,28 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field previous_runs_info, "initial_electron") - # Can't let this counter stay set to 0 - t_params.electron.dfns_output_counter[] = max(t_params.electron.dfns_output_counter[], 1) - success = - @views update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, - fields.phi, r, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, t_params.electron, - collisions, composition, - external_source_settings, num_diss_params, - max_electron_pdf_iterations, - max_electron_sim_time; - io_electron=io_initial_electron, - initial_time=code_time, - residual_tolerance=t_input["initialization_residual_value"], - evolve_ppar=true) - if success != "" - error("!!!max number of iterations for electron pdf update exceeded!!!\n" - * "Stopping at $(Dates.format(now(), dateformat"H:MM:SS"))") + if !skip_electron_solve + # Can't let this counter stay set to 0 + t_params.electron.dfns_output_counter[] = max(t_params.electron.dfns_output_counter[], 1) + success = + @views update_electron_pdf!(scratch_electron, pdf.electron.norm, + moments, fields.phi, r, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + t_params.electron, collisions, + composition, external_source_settings, + num_diss_params, + nl_solver_params.electron_advance, + max_electron_pdf_iterations, + max_electron_sim_time; + io_electron=io_initial_electron, + initial_time=code_time, + residual_tolerance=t_input["initialization_residual_value"], + evolve_ppar=true) + if success != "" + error("!!!max number of iterations for electron pdf update exceeded!!!\n" + * "Stopping at $(Dates.format(now(), dateformat"H:MM:SS"))") + end end # Now run without evolve_ppar=true to get pdf_electron fully to steady state, @@ -732,7 +732,9 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field if global_rank[] == 0 println("Initializing electrons - evolving pdf_electron only to steady state") end - if t_params.implicit_electron_advance + if skip_electron_solve + success = "" + elseif t_params.implicit_electron_advance # Create new nl_solver_info ojbect with higher maximum iterations for # initialisation. initialisation_nl_solver_params = @@ -744,6 +746,9 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.linear_restart, nl_solver_params.electron_advance.linear_max_restarts, nl_solver_params.electron_advance.H, + nl_solver_params.electron_advance.c, + nl_solver_params.electron_advance.s, + nl_solver_params.electron_advance.g, nl_solver_params.electron_advance.V, nl_solver_params.electron_advance.linear_initial_guess, nl_solver_params.electron_advance.n_solves, @@ -752,9 +757,12 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.global_n_solves, nl_solver_params.electron_advance.global_nonlinear_iterations, nl_solver_params.electron_advance.global_linear_iterations, - nl_solver_params.electron_advance.stage_counter, + nl_solver_params.electron_advance.solves_since_precon_update, + nl_solver_params.electron_advance.precon_dt, nl_solver_params.electron_advance.serial_solve, nl_solver_params.electron_advance.max_nonlinear_iterations_this_step, + nl_solver_params.electron_advance.max_linear_iterations_this_step, + nl_solver_params.electron_advance.preconditioner_type, nl_solver_params.electron_advance.preconditioner_update_interval, nl_solver_params.electron_advance.preconditioners, ) @@ -768,8 +776,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field num_diss_params, r, z, vperp, vpa, r_spectral, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, - gyroavs, scratch_dummy, 0.0, - initialisation_nl_solver_params) + gyroavs, scratch_dummy, t_params.electron, + 0.0, initialisation_nl_solver_params) else success = update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, @@ -778,9 +786,11 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field vpa_advect, scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, + nl_solver_params.electron_advance, max_electron_pdf_iterations, max_electron_sim_time; - io_electron=io_initial_electron) + io_electron=io_initial_electron, + evolve_ppar=true, ion_dt=t_params.dt[]) end if success != "" error("!!!max number of iterations for electron pdf update exceeded!!!\n" @@ -1629,13 +1639,17 @@ function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upa @loop_r ir begin # Initialise an unshifted Maxwellian as a first step @loop_z iz begin - vpa_over_vth = @. vpa.scratch3 = vpa.grid + upar[iz,ir] / vth[iz,ir] @loop_vperp ivperp begin - @. pdf[:,ivperp,iz,ir] = exp(-vpa_over_vth^2) + @. pdf[:,ivperp,iz,ir] = exp(-vpa.grid^2) end end end end + + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(pdf, moments, vpa) + + return nothing end function init_pdf_moments_manufactured_solns!(pdf, moments, vz, vr, vzeta, vpa, vperp, z, r, n_ion_species, n_neutral_species, geometry,composition) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index c4fffaaec..f427c3a83 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -34,21 +34,22 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero n_variables::mk_int nstep::mk_int end_time::mk_float - t::MPISharedArray{mk_float,1} - dt::MPISharedArray{mk_float,1} - previous_dt::MPISharedArray{mk_float,1} - next_output_time::MPISharedArray{mk_float,1} - dt_before_output::MPISharedArray{mk_float,1} - dt_before_last_fail::MPISharedArray{mk_float,1} + t::Base.RefValue{mk_float} + dt::Base.RefValue{mk_float} + previous_dt::Base.RefValue{mk_float} + dt_before_output::Base.RefValue{mk_float} + dt_before_last_fail::Base.RefValue{mk_float} CFL_prefactor::mk_float - step_to_moments_output::MPISharedArray{Bool,1} - step_to_dfns_output::MPISharedArray{Bool,1} - write_moments_output::MPISharedArray{Bool,1} - write_dfns_output::MPISharedArray{Bool,1} - step_counter::Ref{mk_int} - moments_output_counter::Ref{mk_int} - dfns_output_counter::Ref{mk_int} - failure_counter::Ref{mk_int} + step_to_moments_output::Base.RefValue{Bool} + step_to_dfns_output::Base.RefValue{Bool} + write_moments_output::Base.RefValue{Bool} + write_dfns_output::Base.RefValue{Bool} + step_counter::Base.RefValue{mk_int} + max_step_count_this_ion_step::Base.RefValue{mk_int} + max_t_increment_this_ion_step::Base.RefValue{mk_float} + moments_output_counter::Base.RefValue{mk_int} + dfns_output_counter::Base.RefValue{mk_int} + failure_counter::Base.RefValue{mk_int} failure_caused_by::Vector{mk_int} limit_caused_by::Vector{mk_int} nwrite_moments::mk_int @@ -77,6 +78,10 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero implicit_ion_advance::Bool implicit_vpa_advection::Bool implicit_electron_ppar::Bool + constraint_forcing_rate::mk_float + decrease_dt_iteration_threshold::mk_int + increase_dt_iteration_threshold::mk_int + cap_factor_ion_dt::mk_float write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool @@ -90,7 +95,7 @@ end """ """ -mutable struct advance_info +struct advance_info vpa_advection::Bool vperp_advection::Bool z_advection::Bool diff --git a/moment_kinetics/src/ionization.jl b/moment_kinetics/src/ionization.jl index 0620f0717..c4380c208 100644 --- a/moment_kinetics/src/ionization.jl +++ b/moment_kinetics/src/ionization.jl @@ -42,39 +42,36 @@ function ion_ionization_collisions_1V!(f_out, fvec_in, vz, vpa, vperp, z, r, vz_ # values of dz/dt; as charge exchange and ionization collisions require # the evaluation of the pdf for species s' to obtain the update for species s, # will thus have to interpolate between the different vpa grids - if moments.evolve_ppar || moments.evolve_upar - if !moments.evolve_upar - # if evolve_ppar = true and evolve_upar = false, vpa coordinate is - # vpahat_s = vpa/vth_s; - # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); - # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations - # in terms of the vpahat_{s'} coordinate: - # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} - @. vpa.scratch = vpa.grid / vth_ratio - elseif !moments.evolve_ppar - # if evolve_ppar = false and evolve_upar = true, vpa coordinate is - # wpa_s = vpa-upar_s; - # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; - # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations - # in terms of the wpa_{s'} coordinate: - # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} - @. vpa.scratch = vpa.grid + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn] - else - # if evolve_ppar = true and evolve_upar = true, vpa coordinate is - # wpahat_s = (vpa-upar_s)/vth_s; - # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); - # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations - # in terms of the wpahat_{s'} coordinate: - # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} - @. vpa.scratch = (vpa.grid * moments.ion.vth[iz,ir,is] + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn]) / moments.neutral.vth[iz,ir,isn] - end - # interpolate to the new grid (passed in as vpa.scratch) - # and return interpolated values in vpa.scratch2 - @views interpolate_to_grid_vpa!(vpa.scratch2, vpa.scratch, fvec_in.pdf_neutral[:,1,1,iz,ir,isn], vz, vz_spectral) + if moments.evolve_upar && moments.evolve_ppar + # if evolve_ppar = true and evolve_upar = true, vpa coordinate is + # wpahat_s = (vpa-upar_s)/vth_s; + # we have f_{s'}(wpahat_{s'}) = f_{s'}((wpahat_s * vth_s + upar_s - upar_{s'}) / vth_{s'}); + # to get f_{s'}(wpahat_s), need to obtain wpahat_s grid locations + # in terms of the wpahat_{s'} coordinate: + # (wpahat_{s'})_j = ((wpahat_{s})_j * vth_{s} + upar_{s} - upar_{s'}) / vth_{s'} + new_grid = @. vpa.scratch = (vpa.grid * moments.ion.vth[iz,ir,is] + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn]) / moments.neutral.vth[iz,ir,isn] + elseif !moments.evolve_upar + # if evolve_ppar = true and evolve_upar = false, vpa coordinate is + # vpahat_s = vpa/vth_s; + # we have f_{s'}(vpahat_{s'}) = f_{s'}(vpahat_s * vth_s / vth_{s'}); + # to get f_{s'}(vpahat_s), need to obtain vpahat_s grid locations + # in terms of the vpahat_{s'} coordinate: + # (vpahat_s)_j = (vpahat_{s'})_j * vth_{s'} / vth_{s} + new_grid = @. vpa.scratch = vpa.grid / vth_ratio + elseif !moments.evolve_ppar + # if evolve_ppar = false and evolve_upar = true, vpa coordinate is + # wpa_s = vpa-upar_s; + # we have f_{s'}(wpa_{s'}) = f_{s'}((wpa_s + upar_s - upar_{s'}; + # to get f_{s'}(wpa_s), need to obtain wpa_s grid locations + # in terms of the wpa_{s'} coordinate: + # (wpa_s)_j = (wpa_{s'})_j + upar_{s'} - upar_{s} + new_grid = @. vpa.scratch = vpa.grid + fvec_in.upar[iz,ir,is] - fvec_in.uz_neutral[iz,ir,isn] else - # no need to interpolate if neither upar or ppar evolved separately from pdf - vpa.scratch2 .= fvec_in.pdf_neutral[:,1,1,iz,ir,isn] + new_grid = vpa.grid end + # interpolate to the new grid (passed in as vpa.scratch) + # and return interpolated values in vpa.scratch2 + @views interpolate_to_grid_vpa!(vpa.scratch2, vpa.scratch, fvec_in.pdf_neutral[:,1,1,iz,ir,isn], vz, vz_spectral) ionization = collisions.reactions.ionization_frequency @loop_vpa ivpa begin f_out[ivpa,1,iz,ir,is] += diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index 3fb386961..766fe7d39 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -3,9 +3,11 @@ module krook_collisions export setup_krook_collisions_input, get_collision_frequency_ii, get_collision_frequency_ee, - get_collision_frequency_ei, krook_collisions!, electron_krook_collisions! + get_collision_frequency_ei, krook_collisions!, electron_krook_collisions!, + add_electron_krook_collisions_to_Jacobian! using ..looping +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..input_structs: krook_collisions_input, set_defaults_and_check_section! using ..reference_parameters: get_reference_collision_frequency_ii, get_reference_collision_frequency_ee, @@ -268,10 +270,13 @@ end """ Add Krook collision operator for electrons + +Note that this function operates on a single point in `r`, so `pdf_out`, `pdf_in`, +`dens_in`, `upar_in`, `upar_ion_in`, and `vth_in` should have no r-dimension. """ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_in, vth_in, collisions, vperp, vpa, dt) - begin_r_z_region() + begin_z_region() # For now, electrons are always fully moment-kinetic evolve_density = true @@ -287,9 +292,9 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ if evolve_ppar && evolve_upar # Compared to evolve_upar version, grid is already normalized by vth, and multiply # through by vth, remembering pdf is already multiplied by vth - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @@ -298,13 +303,13 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - @. vpa.scratch = vpa.grid + (upar_ion_in[iz,ir,1] - upar_in[iz,ir]) / vth + @. vpa.scratch = vpa.grid + (upar_ion_in[iz,1] - upar_in[iz]) / vth @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)) - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ei * (pdf_in[ivpa,ivperp,iz] - exp(-vpa.scratch[ivpa]^2 - vperp.grid[ivperp]^2)) ) end @@ -312,33 +317,33 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ elseif evolve_ppar # Compared to full-f collision operater, multiply through by vth, remembering pdf # is already multiplied by vth, and grid is already normalized by vth - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] - - exp(-((vpa.grid[ivpa] - upar_in[iz,ir])/vth)^2 + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] + - exp(-((vpa.grid[ivpa] - upar_in[iz])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) # e-i collisions push electrons towards a Maxwellian drifting at the ion # parallel flow, so need a corresponding normalised parallel velocity # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] - - exp(-((vpa.grid[ivpa] - upar_ion_in[iz,ir,1])/vth)^2 + + nu_ei * (pdf_in[ivpa,ivperp,iz] + - exp(-((vpa.grid[ivpa] - upar_ion_in[iz,1])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) ) end end elseif evolve_upar # Compared to evolve_density version, grid is already shifted by upar - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @@ -347,14 +352,14 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - @. vpa.scratch = vpa.grid + (upar_ion_in[iz,ir,1] - upar_in[iz,ir]) + @. vpa.scratch = vpa.grid + (upar_ion_in[iz,1] - upar_in[iz]) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth * exp(-(vpa.grid[ivpa] / vth)^2 - (vperp.grid[ivperp] / vth)^2)) - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ei * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth * exp(-(vpa.scratch[ivpa] / vth)^2 - (vperp.grid[ivperp] / vth)^2)) ) @@ -363,33 +368,33 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ elseif evolve_density # Compared to full-f collision operater, divide through by density, remembering # that pdf is already normalized by density - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth - * exp(-((vpa.grid[ivpa] - upar_in[iz,ir]) / vth)^2 + * exp(-((vpa.grid[ivpa] - upar_in[iz]) / vth)^2 - (vperp.grid[ivperp]/vth)^2)) # e-i collisions push electrons towards a Maxwellian drifting at the ion # parallel flow, so need a corresponding normalised parallel velocity # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - + nu_ei * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ei * (pdf_in[ivpa,ivperp,iz] - 1.0 / vth - * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,ir,1]) / vth)^2 + * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,1]) / vth)^2 - (vperp.grid[ivperp]/vth)^2)) ) end end else - @loop_r_z ir iz begin - n = dens_in[iz,ir] - vth = vth_in[iz,ir] + @loop_z iz begin + n = dens_in[iz] + vth = vth_in[iz] if vperp.n == 1 vth_prefactor = 1.0 / vth else @@ -398,19 +403,19 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ nu_ee = get_collision_frequency_ee(collisions, n, vth) nu_ei = get_collision_frequency_ei(collisions, n, vth) @loop_vperp_vpa ivperp ivpa begin - pdf_out[ivpa,ivperp,iz,ir] -= dt * ( - nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + pdf_out[ivpa,ivperp,iz] -= dt * ( + nu_ee * (pdf_in[ivpa,ivperp,iz] - n * vth_prefactor - * exp(-((vpa.grid[ivpa] - upar_in[iz,ir])/vth)^2 + * exp(-((vpa.grid[ivpa] - upar_in[iz])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) # e-i collisions push electrons towards a Maxwellian drifting at the ion # parallel flow, so need a corresponding normalised parallel velocity # coordinate. # For now, assume there is only one ion species rather than bothering to # calculate an average ion flow speed, or sum over ion species here. - + nu_ee * (pdf_in[ivpa,ivperp,iz,ir] + + nu_ee * (pdf_in[ivpa,ivperp,iz] - n * vth_prefactor - * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,ir,1])/vth)^2 + * exp(-((vpa.grid[ivpa] - upar_ion_in[iz,1])/vth)^2 - (vperp.grid[ivperp]/vth)^2)) ) end @@ -420,4 +425,56 @@ function electron_krook_collisions!(pdf_out, pdf_in, dens_in, upar_in, upar_ion_ return nothing end +function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, + vth, upar_ion, collisions, z, vperp, + vpa, z_speed, dt, ir; f_offset=0, + ppar_offset) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + + if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0 + return nothing + end + + v_size = vperp.n * vpa.n + + using_reference_parameters = (collisions.krook.frequency_option == "reference_parameters") + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz]) + nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz]) + jacobian_matrix[row,row] += dt * (nu_ee + nu_ei) + + fM_i = exp(-(vpa.grid[ivpa] + (upar_ion[iz] - upar[iz])/vth[iz])^2 - vperp.grid[ivperp]^2) + # d(f_M(u_i)[irowz])/d(ppar[icolz]) + # = -2*(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)*(-1/2/vth/ppar)*f_M(u_i) * delta(irow,icolz) + # = (vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*f_M(u_i) * delta(irow,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion[iz]-upar[iz])/vth[iz])*(upar_ion[iz]-upar[iz])/vth[iz]/ppar[iz]*fM_i + + if using_reference_parameters + # Both collision frequencies are proportional to n/vth^3=n^(5/2)*(me/2/p)^3/2, + # so + # d(nu[irowz])/d(ppar[icolz]) = -3/2*nu/ppar * delta(irowz,icolz) + # d(-(vpa.grid+(upar_ion-upar)/vth)^2[irowz])/d(ppar[icoliz] + # = -(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar * delta(irow,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * 1.5 / ppar[iz] * + (nu_ee * (f[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)) + + nu_ei * (f[ivpa,ivperp,iz] - fM_i)) + end + end + + return nothing +end + end # krook_collisions diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 66fbe2d7d..8d32077fd 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -632,10 +632,10 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_prefix_iblock, time_index, composition, geometry, r, z, vpa, vperp, vzeta, vr, vz) code_time = 0.0 - dt = nothing - dt_before_last_fail = nothing - electron_dt = nothing - electron_dt_before_last_fail = nothing + dt = Ref(-Inf) + dt_before_last_fail = Ref(Inf) + electron_dt = Ref(-Inf) + electron_dt_before_last_fail = Ref(Inf) previous_runs_info = nothing restart_electron_physics = nothing begin_serial_region() @@ -701,17 +701,14 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.dens_updated .= true moments.ion.upar .= reload_moment("parallel_flow", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.upar_updated .= true moments.ion.ppar .= reload_moment("parallel_pressure", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.ppar_updated .= true moments.ion.pperp .= reload_moment("perpendicular_pressure", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, @@ -720,7 +717,6 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.ion.qpar_updated .= true moments.ion.vth .= reload_moment("thermal_speed", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, @@ -775,16 +771,17 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, end end - if "external_source_controller_integral" ∈ get_variable_keys(dynamic) && - length(moments.ion.external_source_controller_integral) == 1 - moments.ion.external_source_controller_integral .= - load_slice(dynamic, "external_source_controller_integral", time_index) - elseif length(moments.ion.external_source_controller_integral) > 1 - moments.ion.external_source_controller_integral .= - reload_moment("external_source_controller_integral", dynamic, - time_index, r, z, r_range, z_range, restart_r, - restart_r_spectral, restart_z, restart_z_spectral, - interpolation_needed) + if "external_source_controller_integral" ∈ get_variable_keys(dynamic) + if length(moments.ion.external_source_controller_integral) == 1 + moments.ion.external_source_controller_integral .= + load_slice(dynamic, "external_source_controller_integral", time_index) + else + moments.ion.external_source_controller_integral .= + reload_moment("external_source_controller_integral", dynamic, + time_index, r, z, r_range, z_range, restart_r, + restart_r_spectral, restart_z, restart_z_spectral, + interpolation_needed) + end end pdf.ion.norm .= reload_ion_pdf(dynamic, time_index, moments, r, z, vperp, vpa, r_range, @@ -822,28 +819,24 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.dens_updated[] = true moments.electron.upar .= reload_electron_moment("electron_parallel_flow", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.upar_updated[] = true moments.electron.ppar .= reload_electron_moment("electron_parallel_pressure", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.ppar_updated[] = true moments.electron.qpar .= reload_electron_moment("electron_parallel_heat_flux", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.electron.qpar_updated[] = true moments.electron.vth .= reload_electron_moment("electron_thermal_speed", dynamic, time_index, r, z, r_range, z_range, restart_r, @@ -883,12 +876,13 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_electron_evolve_ppar = true, true, true electron_evolve_density, electron_evolve_upar, electron_evolve_ppar = true, true, true - if "electron_physics" ∈ keys(restart_input) - restart_electron_physics = enum_from_string(electron_physics_type, - restart_input["electron_physics"]) - else - restart_electron_physics = boltzmann_electron_response - end + # Input is written to output files with all defaults filled in, and + # restart_input is read from a previous output file. + # restart_input["composition"]["electron_physics"] should always exist, even + # if it was set from a default, so we do not have to check the keys to see + # whether it exists. + restart_electron_physics = enum_from_string(electron_physics_type, + restart_input["composition"]["electron_physics"]) if pdf.electron !== nothing && restart_electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) @@ -912,25 +906,21 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.dens_updated .= true moments.neutral.uz .= reload_moment("uz_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.uz_updated .= true moments.neutral.pz .= reload_moment("pz_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.pz_updated .= true moments.neutral.qz .= reload_moment("qz_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, restart_z, restart_z_spectral, interpolation_needed) - moments.neutral.qz_updated .= true moments.neutral.vth .= reload_moment("thermal_speed_neutral", dynamic, time_index, r, z, r_range, z_range, restart_r, restart_r_spectral, @@ -1020,32 +1010,63 @@ function reload_evolving_fields!(pdf, moments, fields, boundary_distributions, # If "dt" is not present, the file being restarted from is an older # one that did not have an adaptive timestep, so just leave the value # of "dt" from the input file. - dt = load_slice(dynamic, "dt", time_index) + dt[] = load_slice(dynamic, "dt", time_index) end if "dt_before_last_fail" ∈ keys(dynamic) # If "dt_before_last_fail" is not present, the file being # restarted from is an older one that did not have an adaptive # timestep, so just leave the value of "dt_before_last_fail" from # the input file. - dt_before_last_fail = load_slice(dynamic, "dt_before_last_fail", + dt_before_last_fail[] = load_slice(dynamic, "dt_before_last_fail", time_index) end if "electron_dt" ∈ keys(dynamic) - electron_dt = load_slice(dynamic, "electron_dt", time_index) + # The algorithm for electron pseudo-timestepping actually starts each + # solve using t_params.electron.previous_dt[], so "electron_previous_dt" + # is the thing to load. + electron_dt[] = load_slice(dynamic, "electron_previous_dt", time_index) end if "electron_dt_before_last_fail" ∈ keys(dynamic) - electron_dt_before_last_fail = + electron_dt_before_last_fail[] = load_slice(dynamic, "electron_dt_before_last_fail", time_index) end finally close(fid) end end + moments.ion.dens_updated .= true + moments.ion.upar_updated .= true + moments.ion.ppar_updated .= true + moments.ion.qpar_updated .= true + moments.electron.dens_updated[] = true + moments.electron.upar_updated[] = true + moments.electron.ppar_updated[] = true + moments.electron.qpar_updated[] = true + moments.neutral.dens_updated .= true + moments.neutral.uz_updated .= true + moments.neutral.pz_updated .= true + moments.neutral.qz_updated .= true restart_electron_physics = MPI.bcast(restart_electron_physics, 0, comm_block[]) + MPI.Bcast!(dt, comm_block[]) + MPI.Bcast!(dt_before_last_fail, comm_block[]) + MPI.Bcast!(electron_dt, comm_block[]) + MPI.Bcast!(electron_dt_before_last_fail, comm_block[]) + + if dt[] == -Inf + dt = nothing + else + dt = dt[] + end + if electron_dt[] == -Inf + electron_dt = nothing + else + electron_dt = electron_dt[] + end - return code_time, dt, dt_before_last_fail, electron_dt, electron_dt_before_last_fail, - previous_runs_info, time_index, restart_electron_physics + return code_time, dt, dt_before_last_fail[], electron_dt, + electron_dt_before_last_fail[], previous_runs_info, time_index, + restart_electron_physics end """ @@ -4451,8 +4472,10 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t begin_serial_region() # Only need some struct with a 'speed' variable advect = (speed=@view(speed[:,:,:,:,it]),) - @views update_electron_speed_z!(advect, upar[:,:,it], vth[:,:,it], - run_info.vpa.grid) + for ir ∈ 1:run_info.r.n + @views update_electron_speed_z!(advect, upar[:,ir,it], vth[:,ir,it], + run_info.vpa.grid, ir) + end end # Horrible hack so that we can get the speed back without rearranging the @@ -4531,9 +4554,12 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t external_source_density_amplitude=external_source_density_amplitude[:,:,:,it], external_source_momentum_amplitude=external_source_momentum_amplitude[:,:,:,it], external_source_pressure_amplitude=external_source_pressure_amplitude[:,:,:,it]),) - @views update_electron_speed_vpa!(advect, density[:,:,it], upar[:,:,it], - ppar[:,:,it], moments, run_info.vpa.grid, - run_info.external_source_settings.electron) + for ir ∈ 1:run_info.r.n + @views update_electron_speed_vpa!(advect, density[:,ir,it], upar[:,ir,it], + ppar[:,ir,it], moments, run_info.vpa.grid, + run_info.external_source_settings.electron, + ir) + end end variable = speed @@ -4725,7 +4751,7 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t electron_steps_per_output = get_variable(run_info, "electron_steps_per_output"; kwargs...) electron_failures_per_output = get_variable(run_info, "electron_failures_per_output"; kwargs...) electron_successful_steps_per_output = electron_steps_per_output - electron_failures_per_output - electron_pseudotime = get_variable("electron_cumulative_pseudotime"; kwargs...) + electron_pseudotime = get_variable(run_info, "electron_cumulative_pseudotime"; kwargs...) delta_t = copy(electron_pseudotime) for i ∈ length(delta_t):-1:2 diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index f8c0a2274..d0fea0ecd 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -5,12 +5,15 @@ function. """ module moment_constraints +using ..boundary_conditions: skip_f_electron_bc_points_in_Jacobian using ..communication: _block_synchronize using ..looping using ..type_definitions: mk_float using ..velocity_moments: integrate_over_vspace, update_qpar! -export hard_force_moment_constraints!, hard_force_moment_constraints_neutral! +export hard_force_moment_constraints!, hard_force_moment_constraints_neutral!, + electron_implicit_constraint_forcing!, + add_electron_implicit_constraint_forcing_to_Jacobian! """ hard_force_moment_constraints!(f, moments, vpa) @@ -86,6 +89,16 @@ function hard_force_moment_constraints!(f, moments, vpa) return A, B, C end +function hard_force_moment_constraints!(f::AbstractArray{mk_float,4}, moments, vpa) + A = moments.electron.constraints_A_coefficient + B = moments.electron.constraints_B_coefficient + C = moments.electron.constraints_C_coefficient + begin_r_z_region() + @loop_r_z ir iz begin + A[iz,ir], B[iz,ir], C[iz,ir] = + hard_force_moment_constraints!(@view(f[:,:,iz,ir]), moments, vpa) + end +end function hard_force_moment_constraints!(f::AbstractArray{mk_float,5}, moments, vpa) A = moments.ion.constraints_A_coefficient B = moments.ion.constraints_B_coefficient @@ -229,4 +242,90 @@ function moment_constraints_on_residual!(residual::AbstractArray{T,N}, return A, B, C end +""" + electron_implicit_constraint_forcing!(f_out, f_in, constraint_forcing_rate, vpa, + dt, ir) + +Add terms to the electron kinetic equation that force the moment constraints to be +approximately satisfied. Needed to avoid large errors when taking large, implicit +timesteps that do not guarantee accurate time evolution. +""" +function electron_implicit_constraint_forcing!(f_out, f_in, constraint_forcing_rate, vpa, + dt, ir) + begin_z_region() + vpa_grid = vpa.grid + @loop_z iz begin + @views zeroth_moment = integrate_over_vspace(f_in[:,1,iz], vpa.wgts) + @views first_moment = integrate_over_vspace(f_in[:,1,iz], vpa.grid, vpa.wgts) + @views second_moment = integrate_over_vspace(f_in[:,1,iz], vpa.grid, 2, vpa.wgts) + + @loop_vperp_vpa ivperp ivpa begin + f_out[ivpa,ivperp,iz] += + dt * constraint_forcing_rate * + ((1.0 - zeroth_moment) + - first_moment*vpa_grid[ivpa] + + (0.5 - second_moment)*vpa_grid[ivpa]^2) * f_in[ivpa,ivperp,iz] + end + end + + return nothing +end + +""" + add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f, + z_speed, z, vperp, vpa, + constraint_forcing_rate, + dt, ir; f_offset=0) + +Add the contributions corresponding to [`electron_implicit_constraint_forcing!`](@ref) to +`jacobian_matrix`. +""" +function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f, + z_speed, z, vperp, vpa, + constraint_forcing_rate, + dt, ir; f_offset=0) + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + v_size = vperp.n * vpa.n + + zeroth_moment = z.scratch_shared + first_moment = z.scratch_shared2 + second_moment = z.scratch_shared3 + begin_z_region() + @loop_z iz begin + @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts) + @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts) + @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts) + end + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Diagonal terms + jacobian_matrix[row,row] += -dt * constraint_forcing_rate * + ((1.0 - zeroth_moment[iz]) + - first_moment[iz]*vpa_grid[ivpa] + + (0.5 - second_moment[iz])*vpa_grid[ivpa]^2) + + # Integral terms + # d(∫dw_∥ w_∥^n g[irow])/d(g[icol]) = vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^n + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * constraint_forcing_rate * + (1.0 + + vpa_grid[icolvpa]*vpa_grid[ivpa] + + vpa_grid[icolvpa]^2*vpa_grid[ivpa]^2) * + vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp,iz] + end + end + + return nothing +end + end diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 86810a66f..8565a046b 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -39,6 +39,7 @@ include("geo.jl") include("gyroaverages.jl") include("velocity_moments.jl") include("velocity_grid_transforms.jl") +include("boundary_conditions.jl") include("electron_fluid_equations.jl") include("em_fields.jl") include("bgk.jl") @@ -48,7 +49,6 @@ include("moment_constraints.jl") include("fokker_planck_test.jl") include("fokker_planck_calculus.jl") include("fokker_planck.jl") -include("boundary_conditions.jl") include("advection.jl") include("vpa_advection.jl") include("z_advection.jl") @@ -214,7 +214,8 @@ parallel loop ranges, and are only used by the tests in `debug_test/`. function setup_moment_kinetics(input_dict::AbstractDict; restart::Union{Bool,AbstractString}=false, restart_time_index::mk_int=-1, debug_loop_type::Union{Nothing,NTuple{N,Symbol} where N}=nothing, - debug_loop_parallel_dims::Union{Nothing,NTuple{N,Symbol} where N}=nothing) + debug_loop_parallel_dims::Union{Nothing,NTuple{N,Symbol} where N}=nothing, + skip_electron_solve::Bool=false) setup_start_time = now() @@ -348,7 +349,8 @@ function setup_moment_kinetics(input_dict::AbstractDict; dt_before_last_fail, electron_dt, electron_dt_before_last_fail, collisions, species, geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, advection_structs, io_input, - restarting, restart_electron_physics, input_dict) + restarting, restart_electron_physics, input_dict; + skip_electron_solve=skip_electron_solve) # This is the closest we can get to the end time of the setup before writing it to the # output file diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 4016c47ff..9f49aef52 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -160,10 +160,11 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI minimum_dt=0.0, maximum_dt=Inf, implicit_braginskii_conduction=true, - implicit_electron_advance=true, + implicit_electron_advance=false, implicit_ion_advance=false, implicit_vpa_advection=false, - implicit_electron_ppar=false, + implicit_electron_ppar=true, + constraint_forcing_rate=0.0, write_after_fixed_step_count=false, write_error_diagnostics=false, write_steady_state_diagnostics=false, @@ -200,11 +201,15 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI last_fail_proximity_factor=timestepping_section["last_fail_proximity_factor"], minimum_dt=timestepping_section["minimum_dt"] * sqrt(composition.me_over_mi), maximum_dt=timestepping_section["maximum_dt"] * sqrt(composition.me_over_mi), + constraint_forcing_rate=1.0e6, write_after_fixed_step_count=false, write_error_diagnostics=false, write_steady_state_diagnostics=false, high_precision_error_sum=timestepping_section["high_precision_error_sum"], initialization_residual_value=1.0, + decrease_dt_iteration_threshold=100, + increase_dt_iteration_threshold=20, + cap_factor_ion_dt=10.0, no_restart=false, debug_io=false, ) diff --git a/moment_kinetics/src/moment_kinetics_structs.jl b/moment_kinetics/src/moment_kinetics_structs.jl index a1ef580e2..18b5b0189 100644 --- a/moment_kinetics/src/moment_kinetics_structs.jl +++ b/moment_kinetics/src/moment_kinetics_structs.jl @@ -157,23 +157,23 @@ struct moments_electron_substruct # this is the particle density dens::MPISharedArray{mk_float,2} # flag that keeps track of if the density needs updating before use - dens_updated::Ref{Bool} + dens_updated::Base.RefValue{Bool} # this is the parallel flow upar::MPISharedArray{mk_float,2} # flag that keeps track of whether or not upar needs updating before use - upar_updated::Ref{Bool} + upar_updated::Base.RefValue{Bool} # this is the parallel pressure ppar::MPISharedArray{mk_float,2} # flag that keeps track of whether or not ppar needs updating before use - ppar_updated::Ref{Bool} + ppar_updated::Base.RefValue{Bool} # this is the temperature temp::MPISharedArray{mk_float,2} # flag that keeps track of whether or not temp needs updating before use - temp_updated::Ref{Bool} + temp_updated::Base.RefValue{Bool} # this is the parallel heat flux qpar::MPISharedArray{mk_float,2} # flag that keeps track of whether or not qpar needs updating before use - qpar_updated::Ref{Bool} + qpar_updated::Base.RefValue{Bool} # this is the thermal speed based on the parallel temperature Tpar = ppar/dens: vth = sqrt(2*Tpar/m) vth::MPISharedArray{mk_float,2} # this is the parallel friction force between ions and electrons diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 5726da782..89fb4ddf4 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -22,11 +22,12 @@ Useful references: [3] https://en.wikipedia.org/wiki/Generalized_minimal_residual_method [4] https://www.rikvoorhaar.com/blog/gmres [5] E. Carson , J. Liesen, Z. Strakoš, "Towards understanding CG and GMRES through examples", Linear Algebra and its Applications 692, 241–291 (2024), https://doi.org/10.1016/j.laa.2024.04.003. +[6] Q. Zou, "GMRES algorithms over 35 years", Applied Mathematics and Computation 445, 127869 (2023), https://doi.org/10.1016/j.amc.2023.127869 """ module nonlinear_solvers export setup_nonlinear_solve, gather_nonlinear_solver_counters!, - reset_nonlinear_per_stage_counters, newton_solve! + reset_nonlinear_per_stage_counters!, newton_solve! using ..array_allocation: allocate_float, allocate_shared_float using ..communication @@ -36,12 +37,11 @@ using ..looping using ..type_definitions: mk_float, mk_int using LinearAlgebra -using MINPACK using MPI using SparseArrays using StatsBase: mean -struct nl_solver_info{TH,TV,Tlig,Tprecon} +struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon} rtol::mk_float atol::mk_float nonlinear_max_iterations::mk_int @@ -50,17 +50,23 @@ struct nl_solver_info{TH,TV,Tlig,Tprecon} linear_restart::mk_int linear_max_restarts::mk_int H::TH + c::Tcsg + s::Tcsg + g::Tcsg V::TV linear_initial_guess::Tlig - n_solves::Ref{mk_int} - nonlinear_iterations::Ref{mk_int} - linear_iterations::Ref{mk_int} - global_n_solves::Ref{mk_int} - global_nonlinear_iterations::Ref{mk_int} - global_linear_iterations::Ref{mk_int} - stage_counter::Ref{mk_int} + n_solves::Base.RefValue{mk_int} + nonlinear_iterations::Base.RefValue{mk_int} + linear_iterations::Base.RefValue{mk_int} + global_n_solves::Base.RefValue{mk_int} + global_nonlinear_iterations::Base.RefValue{mk_int} + global_linear_iterations::Base.RefValue{mk_int} + solves_since_precon_update::Base.RefValue{mk_int} + precon_dt::Base.RefValue{mk_float} serial_solve::Bool - max_nonlinear_iterations_this_step::Ref{mk_int} + max_nonlinear_iterations_this_step::Base.RefValue{mk_int} + max_linear_iterations_this_step::Base.RefValue{mk_int} + preconditioner_type::String preconditioner_update_interval::mk_int preconditioners::Tprecon end @@ -76,7 +82,7 @@ for example a preconditioner object for each point in that outer loop. """ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); default_rtol=1.0e-5, default_atol=1.0e-12, serial_solve=false, - electron_ppar_pdf_solve=false, preconditioner_type=nothing) + electron_ppar_pdf_solve=false, preconditioner_type="none") nl_solver_section = set_defaults_and_check_section!( input_dict, "nonlinear_solver"; rtol=default_rtol, @@ -105,17 +111,29 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa if serial_solve H = allocate_float(linear_restart + 1, linear_restart) + c = allocate_float(linear_restart + 1) + s = allocate_float(linear_restart + 1) + g = allocate_float(linear_restart + 1) V = allocate_float(reverse(coord_sizes)..., linear_restart+1) H .= 0.0 + c .= 0.0 + s .= 0.0 + g .= 0.0 V .= 0.0 elseif electron_ppar_pdf_solve H = allocate_shared_float(linear_restart + 1, linear_restart) - V_ppar = allocate_shared_float(coords.z.n, coords.r.n, linear_restart+1) + c = allocate_shared_float(linear_restart + 1) + s = allocate_shared_float(linear_restart + 1) + g = allocate_shared_float(linear_restart + 1) + V_ppar = allocate_shared_float(coords.z.n, linear_restart+1) V_pdf = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1) begin_serial_region() @serial_region begin H .= 0.0 + c .= 0.0 + s .= 0.0 + g .= 0.0 V_ppar .= 0.0 V_pdf .= 0.0 end @@ -123,11 +141,17 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa V = (V_ppar, V_pdf) else H = allocate_shared_float(linear_restart + 1, linear_restart) + c = allocate_shared_float(linear_restart + 1) + s = allocate_shared_float(linear_restart + 1) + g = allocate_shared_float(linear_restart + 1) V = allocate_shared_float(reverse(coord_sizes)..., linear_restart+1) begin_serial_region() @serial_region begin H .= 0.0 + c .= 0.0 + s .= 0.0 + g .= 0.0 V .= 0.0 end end @@ -137,8 +161,26 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa # These will be calculated properly within the time loop. preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)), reverse(outer_coord_sizes)) - else + elseif preconditioner_type == "electron_split_lu" + preconditioners = (z=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)), + tuple(coords.vpa.n, reverse(outer_coord_sizes)...)), + vpa=fill(lu(sparse(1.0*I, coords.vpa.n, coords.vpa.n)), + tuple(coords.z.n, reverse(outer_coord_sizes)...)), + ppar=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)), + reverse(outer_coord_sizes)), + ) + elseif preconditioner_type == "electron_lu" + pdf_plus_ppar_size = total_size_coords + coords.z.n + preconditioners = fill((lu(sparse(1.0*I, 1, 1)), + allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size), + allocate_shared_float(pdf_plus_ppar_size), + allocate_shared_float(pdf_plus_ppar_size), + ), + reverse(outer_coord_sizes)) + elseif preconditioner_type == "none" preconditioners = nothing + else + error("Unrecognised preconditioner_type=$preconditioner_type") end linear_initial_guess = zeros(linear_restart) @@ -146,23 +188,30 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol, nl_solver_input.nonlinear_max_iterations, nl_solver_input.linear_rtol, nl_solver_input.linear_atol, - linear_restart, nl_solver_input.linear_max_restarts, H, V, - linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), - Ref(0), Ref(0), serial_solve, Ref(0), + linear_restart, nl_solver_input.linear_max_restarts, H, c, s, g, + V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), + Ref(0), Ref(nl_solver_input.preconditioner_update_interval), + Ref(0.0), serial_solve, Ref(0), Ref(0), preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) end """ - reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) + reset_nonlinear_per_stage_counters!(nl_solver_params::Union{nl_solver_info,Nothing}) Reset the counters that hold per-step totals or maximums in `nl_solver_params`. + +Also increment `nl_solver_params.stage_counter[]`. """ -function reset_nonlinear_per_stage_counters(nl_solver_params::Union{nl_solver_info,Nothing}) +function reset_nonlinear_per_stage_counters!(nl_solver_params::Union{nl_solver_info,Nothing}) if nl_solver_params === nothing return nothing end nl_solver_params.max_nonlinear_iterations_this_step[] = 0 + nl_solver_params.max_linear_iterations_this_step[] = 0 + + # Also increment the stage counter + nl_solver_params.solves_since_precon_update[] += 1 return nothing end @@ -186,9 +235,9 @@ function gather_nonlinear_solver_counters!(nl_solver_params) end if nl_solver_params.vpa_advection !== nothing # Solves are run in serial on separate processes, so need a global Allreduce - nl_solver_params.vpa_advection.global_n_solves[] = MPI.Allreduce(nl_solver_params.vpa_advection.n_solves[], +, comm_world) - nl_solver_params.vpa_advection.global_nonlinear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world) - nl_solver_params.vpa_advection.global_linear_iterations[] = MPI.Allreduce(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world) + MPI.Allreduce!(nl_solver_params.vpa_advection.n_solves[], +, comm_world) + MPI.Allreduce!(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world) + MPI.Allreduce!(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world) end end @@ -255,21 +304,35 @@ is not necessary to have a very tight `linear_rtol` for the GMRES solve. function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params; left_preconditioner=nothing, right_preconditioner=nothing, coords) + # This wrapper function constructs the `solver_type` from coords, so that the body of + # the inner `newton_solve!()` can be fully type-stable + solver_type = Val(Symbol((c for c ∈ keys(coords))...)) + return newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, + nl_solver_params, solver_type; left_preconditioner=left_preconditioner, + right_preconditioner=right_preconditioner, coords=coords) +end +function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, + nl_solver_params, solver_type::Val; left_preconditioner=nothing, + right_preconditioner=nothing, coords) rtol = nl_solver_params.rtol atol = nl_solver_params.atol - distributed_norm = get_distributed_norm(coords, rtol, atol, x) - distributed_dot = get_distributed_dot(coords, rtol, atol, x) - parallel_map = get_parallel_map(coords) - parallel_delta_x_calc = get_parallel_delta_x_calc(coords) + if left_preconditioner === nothing + left_preconditioner = identity + end + if right_preconditioner === nothing + right_preconditioner = identity + end + + norm_params = (coords, nl_solver_params.rtol, nl_solver_params.atol, x) residual_func!(residual, x) - residual_norm = distributed_norm(residual) + residual_norm = distributed_norm(solver_type, residual, norm_params...) counter = 0 linear_counter = 0 - parallel_map(()->0.0, delta_x) + parallel_map(solver_type, ()->0.0, delta_x) close_counter = -1 close_linear_counter = -1 @@ -279,30 +342,21 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, counter += 1 #println("\nNewton ", counter) - if left_preconditioner === nothing - left_preconditioner = identity - end - if right_preconditioner === nothing - right_preconditioner = identity - end - # Solve (approximately?): # J δx = -RHS(x) - parallel_map(()->0.0, delta_x) - linear_its = linear_solve!(x, residual_func!, residual, delta_x, v, w; - coords=coords, rtol=nl_solver_params.linear_rtol, + parallel_map(solver_type, ()->0.0, delta_x) + linear_its = linear_solve!(x, residual_func!, residual, delta_x, v, w, + solver_type, norm_params; coords=coords, + rtol=nl_solver_params.linear_rtol, atol=nl_solver_params.linear_atol, restart=nl_solver_params.linear_restart, max_restarts=nl_solver_params.linear_max_restarts, left_preconditioner=left_preconditioner, right_preconditioner=right_preconditioner, - H=nl_solver_params.H, V=nl_solver_params.V, - rhs_delta=rhs_delta, + H=nl_solver_params.H, c=nl_solver_params.c, + s=nl_solver_params.s, g=nl_solver_params.g, + V=nl_solver_params.V, rhs_delta=rhs_delta, initial_guess=nl_solver_params.linear_initial_guess, - distributed_norm=distributed_norm, - distributed_dot=distributed_dot, - parallel_map=parallel_map, - parallel_delta_x_calc=parallel_delta_x_calc, serial_solve=nl_solver_params.serial_solve) linear_counter += linear_its @@ -313,14 +367,14 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # during the line search, which might make it fail to converge). So calculate the # updated value in the buffer `w` until the line search is completed, and only # then copy it into `x`. - parallel_map((x) -> x, w, x) - parallel_map((x,delta_x) -> x + delta_x, w, x, delta_x) + parallel_map(solver_type, (x) -> x, w, x) + parallel_map(solver_type, (x,delta_x) -> x + delta_x, w, x, delta_x) residual_func!(residual, w) # For the Newton iteration, we want the norm divided by the (sqrt of the) number # of grid points, so we can use a tolerance that is independent of the size of the # grid. This is unlike the norms needed in `linear_solve!()`. - residual_norm = distributed_norm(residual) + residual_norm = distributed_norm(solver_type, residual, norm_params...) if isnan(residual_norm) error("NaN in Newton iteration at iteration $counter") end @@ -329,32 +383,39 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # decrease residual_norm s = 0.5 while s > 1.0e-2 - parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) residual_func!(residual, x) - residual_norm = distributed_norm(residual) + residual_norm = distributed_norm(solver_type, residual, norm_params...) if residual_norm ≤ previous_residual_norm break end s *= 0.5 end + #if residual_norm > previous_residual_norm + # # Failed to find a point that decreases the residual, so try a negative + # # step + # s = -1.0e-5 + # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) + # residual_func!(residual, x) + # residual_norm = distributed_norm(solver_type, residual, norm_params...) + # if residual_norm > previous_residual_norm + # # That didn't work either, so just take the full step and hope for + # # convergence later + # parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) + # residual_func!(residual, x) + # residual_norm = distributed_norm(solver_type, residual, norm_params...) + # end + #end if residual_norm > previous_residual_norm - # Failed to find a point that decreases the residual, so try a negative - # step - s = -1.0e-5 - parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) + # Line search didn't work, so just take the full step and hope for + # convergence later + parallel_map(solver_type, (x,delta_x,s) -> x + s * delta_x, w, x, delta_x, s) residual_func!(residual, x) - residual_norm = distributed_norm(residual) - if residual_norm > previous_residual_norm - # That didn't work either, so just take the full step and hope for - # convergence later - parallel_map((x,delta_x) -> x + s * delta_x, w, x, delta_x) - residual_func!(residual, x) - residual_norm = distributed_norm(residual) - end + residual_norm = distributed_norm(solver_type, residual, norm_params...) end end - parallel_map((w) -> w, x, w) + parallel_map(solver_type, (w) -> w, x, w) previous_residual_norm = residual_norm #println("Newton residual ", residual_norm, " ", linear_its, " $rtol $atol") @@ -375,6 +436,8 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, nl_solver_params.linear_iterations[] += linear_counter nl_solver_params.max_nonlinear_iterations_this_step[] = max(counter, nl_solver_params.max_nonlinear_iterations_this_step[]) + nl_solver_params.max_linear_iterations_this_step[] = + max(linear_counter, nl_solver_params.max_linear_iterations_this_step[]) # println("Newton iterations: ", counter) # println("Final residual: ", residual_norm) # println("Total linear iterations: ", linear_counter) @@ -388,36 +451,8 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, return success end -""" - get_distributed_norm(coords, rtol, atol, x) - -Get a 'distributed_norm' function that acts on arrays with dimensions given by the -entries in `coords`. -""" -function get_distributed_norm(coords, rtol, atol, x) - dims = keys(coords) - if dims == (:z,) - this_norm = distributed_norm_z - elseif dims == (:vpa,) - this_norm = distributed_norm_vpa - elseif dims == (:r, :z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - this_norm = distributed_norm_r_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - this_norm = distributed_norm_s_r_z_vperp_vpa - else - error("dims=$dims is not supported yet. Need to write another " - * "`distributed_norm_*()` function in nonlinear_solvers.jl") - end - - wrapped_norm = (args...; kwargs...) -> this_norm(args...; rtol=rtol, atol=atol, x=x, - coords=coords, kwargs...) - - return wrapped_norm -end - -function distributed_norm_z(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) +function distributed_norm(::Val{:z}, residual::AbstractArray{mk_float, 1}, coords, rtol, + atol, x) z = coords.z begin_z_region() @@ -438,20 +473,21 @@ function distributed_norm_z(residual::AbstractArray{mk_float, 1}; coords, rtol, end _block_synchronize() - block_norm = MPI.Reduce(local_norm, +, comm_block[]) + global_norm = Ref(local_norm) + MPI.Reduce!(global_norm, +, comm_block[]) # global_norm is the norm_square for the block if block_rank[] == 0 - global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) - global_norm = sqrt(global_norm / z.n_global) - else - global_norm = nothing + MPI.Allreduce!(global_norm, +, comm_inter_block[]) # global_norm is the norm_square for the whole grid + global_norm[] = sqrt(global_norm[] / z.n_global) end - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + _block_synchronize() + MPI.Bcast!(global_norm, comm_block[]; root=0) - return global_norm + return global_norm[] end -function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol, atol, x) +function distributed_norm(::Val{:vpa}, residual::AbstractArray{mk_float, 1}, coords, rtol, + atol, x) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. residual_norm = 0.0 @@ -464,77 +500,71 @@ function distributed_norm_vpa(residual::AbstractArray{mk_float, 1}; coords, rtol return residual_norm end -function distributed_norm_r_z_vperp_vpa(residual::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}; - coords, rtol, atol, x) +function distributed_norm(::Val{:zvperpvpa}, + residual::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, + coords, rtol, atol, x) ppar_residual, pdf_residual = residual x_ppar, x_pdf = x - r = coords.r z = coords.z vperp = coords.vperp vpa = coords.vpa - if r.irank < r.nrank - 1 - rend = r.n - else - rend = r.n + 1 - end if z.irank < z.nrank - 1 zend = z.n else zend = z.n + 1 end - begin_r_z_region() + begin_z_region() ppar_local_norm_square = 0.0 - @loop_r_z ir iz begin - if ir == rend || iz == zend + @loop_z iz begin + if iz == zend continue end - ppar_local_norm_square += (ppar_residual[iz,ir] / (rtol * abs(x_ppar[iz,ir]) + atol))^2 + ppar_local_norm_square += (ppar_residual[iz] / (rtol * abs(x_ppar[iz]) + atol))^2 end _block_synchronize() - ppar_block_norm_square = MPI.Reduce(ppar_local_norm_square, +, comm_block[]) + global_norm_ppar = Ref(ppar_local_norm_square) # global_norm_ppar is the norm_square for ppar in the block + MPI.Reduce!(global_norm_ppar, +, comm_block[]) if block_rank[] == 0 - ppar_global_norm_square = MPI.Allreduce(ppar_block_norm_square, +, comm_inter_block[]) - ppar_global_norm_square = ppar_global_norm_square / (r.n_global * z.n_global) - else - ppar_global_norm_square = nothing + MPI.Allreduce!(global_norm_ppar, +, comm_inter_block[]) # global_norm_ppar is the norm_square for ppar in the whole grid + global_norm_ppar[] = global_norm_ppar[] / z.n_global end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() pdf_local_norm_square = 0.0 - @loop_r_z ir iz begin - if ir == rend || iz == zend + @loop_z iz begin + if iz == zend continue end @loop_vperp_vpa ivperp ivpa begin - pdf_local_norm_square += (pdf_residual[ivpa,ivperp,iz,ir] / (rtol * abs(x_pdf[ivpa,ivperp,iz,ir]) + atol))^2 + pdf_local_norm_square += (pdf_residual[ivpa,ivperp,iz] / (rtol * abs(x_pdf[ivpa,ivperp,iz]) + atol))^2 end end _block_synchronize() - pdf_block_norm_square = MPI.Reduce(pdf_local_norm_square, +, comm_block[]) + global_norm = Ref(pdf_local_norm_square) + MPI.Reduce!(global_norm, +, comm_block[]) # global_norm is the norm_square for the block if block_rank[] == 0 - pdf_global_norm_square = MPI.Allreduce(pdf_block_norm_square, +, comm_inter_block[]) - pdf_global_norm_square = pdf_global_norm_square / (r.n_global * z.n_global * vperp.n_global * vpa.n_global) + MPI.Allreduce!(global_norm, +, comm_inter_block[]) # global_norm is the norm_square for the whole grid + global_norm[] = global_norm[] / (z.n_global * vperp.n_global * vpa.n_global) - global_norm = sqrt(mean((ppar_global_norm_square, pdf_global_norm_square))) - else - global_norm = nothing + global_norm[] = sqrt(mean((global_norm_ppar[], global_norm[]))) end + _block_synchronize() - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) + MPI.Bcast!(global_norm, comm_block[]; root=0) - return global_norm + return global_norm[] end -function distributed_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}; - coords, rtol, atol, x) +function distributed_norm(::Val{:srzvperpvpa}, residual::AbstractArray{mk_float, 5}, + coords, rtol, atol, x) n_ion_species = coords.s r = coords.r z = coords.z @@ -564,49 +594,21 @@ function distributed_norm_s_r_z_vperp_vpa(residual::AbstractArray{mk_float, 5}; end _block_synchronize() - block_norm = MPI.Reduce(local_norm, +, comm_block[]) + global_norm = Ref(local_norm) + MPI.Reduce!(global_norm, +, comm_block[]) # global_norm is the norm_square for the block if block_rank[] == 0 - global_norm = MPI.Allreduce(block_norm, +, comm_inter_block[]) - global_norm = sqrt(global_norm / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global)) - else - global_norm = nothing + MPI.Allreduce!(global_norm, +, comm_inter_block[]) # global_norm is the norm_square for the whole grid + global_norm[] = sqrt(global_norm[] / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global)) end - global_norm = MPI.bcast(global_norm, comm_block[]; root=0) - - return global_norm -end - -""" - get_distributed_dot(coords, rtol, atol, x) - -Get a 'distributed_dot' function that acts on arrays with dimensions given by the entries -in `coords`. -""" -function get_distributed_dot(coords, rtol, atol, x) - dims = keys(coords) - if dims == (:z,) - this_dot = distributed_dot_z - elseif dims == (:vpa,) - this_dot = distributed_dot_vpa - elseif dims == (:r, :z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - this_dot = distributed_dot_r_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - this_dot = distributed_dot_s_r_z_vperp_vpa - else - error("dims=$dims is not supported yet. Need to write another " - * "`distributed_dot_*()` function in nonlinear_solvers.jl") - end - - wrapped_dot = (args...; kwargs...) -> this_dot(args...; rtol=rtol, atol=atol, x=x, - coords=coords, kwargs...) + _block_synchronize() + MPI.Bcast!(global_norm, comm_block[]; root=0) + return global_norm[] end -function distributed_dot_z(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; - coords, atol, rtol, x) +function distributed_dot(::Val{:z}, v::AbstractArray{mk_float, 1}, + w::AbstractArray{mk_float, 1}, coords, rtol, atol, x) z = coords.z @@ -630,20 +632,19 @@ function distributed_dot_z(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_fl end _block_synchronize() - block_dot = MPI.Reduce(local_dot, +, comm_block[]) + global_dot = Ref(local_dot) + MPI.Reduce!(global_dot, +, comm_block[]) # global_dot is the dot for the block if block_rank[] == 0 - global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) - global_dot = global_dot / z.n_global - else - global_dot = nothing + MPI.Allreduce!(global_dot, +, comm_inter_block[]) # global_dot is the dot for the whole grid + global_dot[] = global_dot[] / z.n_global end - return global_dot + return global_dot[] end -function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_float, 1}; - coords, atol, rtol, x) +function distributed_dot(::Val{:vpa}, v::AbstractArray{mk_float, 1}, + w::AbstractArray{mk_float, 1}, coords, rtol, atol, x) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. local_dot = 0.0 @@ -654,77 +655,69 @@ function distributed_dot_vpa(v::AbstractArray{mk_float, 1}, w::AbstractArray{mk_ return local_dot end -function distributed_dot_r_z_vperp_vpa(v::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, - w::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}; - coords, atol, rtol, x) +function distributed_dot(::Val{:zvperpvpa}, + v::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, + w::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, + coords, rtol, atol, x) v_ppar, v_pdf = v w_ppar, w_pdf = w x_ppar, x_pdf = x - r = coords.r z = coords.z vperp = coords.vperp vpa = coords.vpa - if r.irank < r.nrank - 1 - rend = r.n - else - rend = r.n + 1 - end if z.irank < z.nrank - 1 zend = z.n else zend = z.n + 1 end - begin_r_z_region() + begin_z_region() ppar_local_dot = 0.0 - @loop_r_z ir iz begin - if ir == rend || iz == zend + @loop_z iz begin + if iz == zend continue end - ppar_local_dot += v_ppar[iz,ir] * w_ppar[iz,ir] / (rtol * abs(x_ppar[iz,ir]) + atol)^2 + ppar_local_dot += v_ppar[iz] * w_ppar[iz] / (rtol * abs(x_ppar[iz]) + atol)^2 end _block_synchronize() - ppar_block_dot = MPI.Reduce(ppar_local_dot, +, comm_block[]) + ppar_global_dot = Ref(ppar_local_dot) + MPI.Reduce!(ppar_global_dot, +, comm_block[]) # ppar_global_dot is the ppar_dot for the block if block_rank[] == 0 - ppar_global_dot = MPI.Allreduce(ppar_block_dot, +, comm_inter_block[]) - ppar_global_dot = ppar_global_dot / (r.n_global * z.n_global * vperp.n_global * vpa.n_global) - else - ppar_global_dot = nothing + MPI.Allreduce!(ppar_global_dot, +, comm_inter_block[]) # ppar_global_dot is the ppar_dot for the whole grid + ppar_global_dot[] = ppar_global_dot[] / z.n_global end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() pdf_local_dot = 0.0 - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - if ir == rend || iz == zend + @loop_z_vperp_vpa iz ivperp ivpa begin + if iz == zend continue end - pdf_local_dot += v_pdf[ivpa,ivperp,iz,ir] * w_pdf[ivpa,ivperp,iz,ir] / (rtol * abs(x_pdf[ivpa,ivperp,iz,ir]) + atol)^2 + pdf_local_dot += v_pdf[ivpa,ivperp,iz] * w_pdf[ivpa,ivperp,iz] / (rtol * abs(x_pdf[ivpa,ivperp,iz]) + atol)^2 end _block_synchronize() - pdf_block_dot = MPI.Reduce(pdf_local_dot, +, comm_block[]) + global_dot = Ref(pdf_local_dot) + MPI.Reduce!(global_dot, +, comm_block[]) # global_dot is the dot for the block if block_rank[] == 0 - pdf_global_dot = MPI.Allreduce(pdf_block_dot, +, comm_inter_block[]) - pdf_global_dot = pdf_global_dot / (r.n_global * z.n_global * vperp.n_global * vpa.n_global) + MPI.Allreduce!(global_dot, +, comm_inter_block[]) # global_dot is the dot for the whole grid + global_dot[] = global_dot[] / (z.n_global * vperp.n_global * vpa.n_global) - global_dot = mean((ppar_global_dot, pdf_global_dot)) - else - global_dot = nothing + global_dot[] = mean((ppar_global_dot[], global_dot[])) end - return global_dot + return global_dot[] end -function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5}, - w::AbstractArray{mk_float, 5}; - coords, atol, rtol, x) +function distributed_dot(::Val{:srzvperpvpa}, v::AbstractArray{mk_float, 5}, + w::AbstractArray{mk_float, 5}, coords, rtol, atol, x) n_ion_species = coords.s r = coords.r z = coords.z @@ -753,46 +746,21 @@ function distributed_dot_s_r_z_vperp_vpa(v::AbstractArray{mk_float, 5}, end _block_synchronize() - block_dot = MPI.Reduce(local_dot, +, comm_block[]) + global_dot = Ref(local_dot) + MPI.Reduce!(global_dot, +, comm_block[]) # global_dot is the dot for the block if block_rank[] == 0 - global_dot = MPI.Allreduce(block_dot, +, comm_inter_block[]) - global_dot = global_dot / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global) - else - global_dot = nothing + MPI.Allreduce!(global_dot, +, comm_inter_block[]) # global_dot is the dot for the whole grid + global_dot[] = global_dot[] / (n_ion_species * r.n_global * z.n_global * vperp.n_global * vpa.n_global) end - return global_dot -end - -""" - get_parallel_map(coords) - -Get a 'parallel_map' function that acts on arrays with dimensions given by the entries in -`coords`. -""" -function get_parallel_map(coords) - dims = keys(coords) - if dims == (:z,) - return parallel_map_z - elseif dims == (:vpa,) - return parallel_map_vpa - elseif dims == (:r, :z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - return parallel_map_r_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - return parallel_map_s_r_z_vperp_vpa - else - error("dims=$dims is not supported yet. Need to write another " - * "`parallel_map_*()` function in nonlinear_solvers.jl") - end + return global_dot[] end # Separate versions for different numbers of arguments as generator expressions result in # slow code -function parallel_map_z(func, result::AbstractArray{mk_float, 1}) +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}) begin_z_region() @@ -802,7 +770,7 @@ function parallel_map_z(func, result::AbstractArray{mk_float, 1}) return nothing end -function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1) +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}, x1) begin_z_region() @@ -812,18 +780,40 @@ function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1) return nothing end -function parallel_map_z(func, result::AbstractArray{mk_float, 1}, x1, x2) +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}, x1, x2) begin_z_region() - @loop_z iz begin - result[iz] = func(x1[iz], x2[iz]) + if isa(x2, AbstractArray) + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz]) + end + else + @loop_z iz begin + result[iz] = func(x1[iz], x2) + end end return nothing end +function parallel_map(::Val{:z}, func, result::AbstractArray{mk_float, 1}, x1, x2, x3) + + begin_z_region() -function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}) + if isa(x3, AbstractArray) + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz], x3[iz]) + end + else + @loop_z iz begin + result[iz] = func(x1[iz], x2[iz], x3) + end + end + + return nothing +end + +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. for i ∈ eachindex(result) @@ -831,7 +821,7 @@ function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}) end return nothing end -function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1) +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}, x1) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. for i ∈ eachindex(result) @@ -839,74 +829,143 @@ function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1) end return nothing end -function parallel_map_vpa(func, result::AbstractArray{mk_float, 1}, x1, x2) +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}, x1, x2) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. - for i ∈ eachindex(result) - result[i] = func(x1[i], x2[i]) + if isa(x2, AbstractArray) + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i]) + end + else + for i ∈ eachindex(result) + result[i] = func(x1[i], x2) + end + end + return nothing +end +function parallel_map(::Val{:vpa}, func, result::AbstractArray{mk_float, 1}, x1, x2, x3) + # No parallelism needed when the implicit solve is over vpa - assume that this will be + # called inside a parallelised s_r_z_vperp loop. + if isa(x3, AbstractArray) + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i], x3[i]) + end + else + for i ∈ eachindex(result) + result[i] = func(x1[i], x2[i], x3) + end end return nothing end -function parallel_map_r_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}) +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}) result_ppar, result_pdf = result - begin_r_z_region() + begin_z_region() - @loop_r_z ir iz begin - result_ppar[iz,ir] = func() + @loop_z iz begin + result_ppar[iz] = func() end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - result_pdf[ivpa,ivperp,iz,ir] = func() + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func() end return nothing end -function parallel_map_r_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, x1) +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1) result_ppar, result_pdf = result x1_ppar, x1_pdf = x1 - begin_r_z_region() + begin_z_region() - @loop_r_z ir iz begin - result_ppar[iz,ir] = func(x1_ppar[iz,ir]) + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz]) end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - result_pdf[ivpa,ivperp,iz,ir] = func(x1_pdf[ivpa,ivperp,iz,ir]) + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz]) end return nothing end -function parallel_map_r_z_vperp_vpa(func, result::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, x1, x2) +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1, x2) result_ppar, result_pdf = result x1_ppar, x1_pdf = x1 - x2_ppar, x2_pdf = x2 - begin_r_z_region() + if isa(x2, Tuple) + x2_ppar, x2_pdf = x2 + begin_z_region() + + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz]) + end + + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz]) + end + else + begin_z_region() + + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2) + end - @loop_r_z ir iz begin - result_ppar[iz,ir] = func(x1_ppar[iz,ir], x2_ppar[iz,ir]) + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2) + end end - begin_r_z_vperp_vpa_region() + return nothing +end +function parallel_map(::Val{:zvperpvpa}, func, result::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, x1, x2, x3) + + result_ppar, result_pdf = result + x1_ppar, x1_pdf = x1 + x2_ppar, x2_pdf = x2 + + if isa(x3, Tuple) + x3_ppar, x3_pdf = x3 + begin_z_region() + + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz], x3_ppar[iz]) + end + + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz], x3_pdf[ivpa,ivperp,iz]) + end + else + begin_z_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin - result_pdf[ivpa,ivperp,iz,ir] = func(x1_pdf[ivpa,ivperp,iz,ir], x2_pdf[ivpa,ivperp,iz,ir]) + @loop_z iz begin + result_ppar[iz] = func(x1_ppar[iz], x2_ppar[iz], x3) + end + + begin_z_vperp_vpa_region() + + @loop_z_vperp_vpa iz ivperp ivpa begin + result_pdf[ivpa,ivperp,iz] = func(x1_pdf[ivpa,ivperp,iz], x2_pdf[ivpa,ivperp,iz], x3) + end end return nothing end -function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}) +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}) begin_s_r_z_vperp_vpa_region() @@ -916,7 +975,7 @@ function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}) return nothing end -function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1) +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}, x1) begin_s_r_z_vperp_vpa_region() @@ -926,42 +985,40 @@ function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, return nothing end -function parallel_map_s_r_z_vperp_vpa(func, result::AbstractArray{mk_float, 5}, x1, x2) +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}, x1, x2) begin_s_r_z_vperp_vpa_region() - @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin - result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is]) + if isa(x2, AbstractArray) + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is]) + end + else + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2) + end end return nothing end +function parallel_map(::Val{:srzvperpvpa}, func, result::AbstractArray{mk_float, 5}, x1, x2, x3) -""" - get_parallel_delta_x_calc(coords) + begin_s_r_z_vperp_vpa_region() -Get a parallelised function that calculates the update `delta_x` from the `V` matrix and -the minimum residual coefficients `y`. -""" -function get_parallel_delta_x_calc(coords) - dims = keys(coords) - if dims == (:z,) - return parallel_delta_x_calc_z - elseif dims == (:vpa,) - return parallel_delta_x_calc_vpa - elseif dims == (:r, :z, :vperp, :vpa) - # Intended for implicit solve combining electron_ppar and pdf_electron, so will - # not work for a single variable. - return parallel_delta_x_calc_r_z_vperp_vpa - elseif dims == (:s, :r, :z, :vperp, :vpa) - return parallel_delta_x_calc_s_r_z_vperp_vpa + if isa(x3, AbstractArray) + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is], x3[ivpa,ivperp,iz,ir,is]) + end else - error("dims=$dims is not supported yet. Need to write another " - * "`parallel_delta_x_calc_*()` function in nonlinear_solvers.jl") + @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin + result[ivpa,ivperp,iz,ir,is] = func(x1[ivpa,ivperp,iz,ir,is], x2[ivpa,ivperp,iz,ir,is], x3) + end end + + return nothing end -function parallel_delta_x_calc_z(delta_x::AbstractArray{mk_float, 1}, V, y) +function parallel_delta_x_calc(::Val{:z}, delta_x::AbstractArray{mk_float, 1}, V, y) begin_z_region() @@ -975,7 +1032,7 @@ function parallel_delta_x_calc_z(delta_x::AbstractArray{mk_float, 1}, V, y) return nothing end -function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y) +function parallel_delta_x_calc(::Val{:vpa}, delta_x::AbstractArray{mk_float, 1}, V, y) # No parallelism needed when the implicit solve is over vpa - assume that this will be # called inside a parallelised s_r_z_vperp loop. ny = length(y) @@ -987,33 +1044,33 @@ function parallel_delta_x_calc_vpa(delta_x::AbstractArray{mk_float, 1}, V, y) return nothing end -function parallel_delta_x_calc_r_z_vperp_vpa(delta_x::Tuple{AbstractArray{mk_float, 2},AbstractArray{mk_float, 4}}, V, y) +function parallel_delta_x_calc(::Val{:zvperpvpa}, delta_x::Tuple{AbstractArray{mk_float, 1},AbstractArray{mk_float, 3}}, V, y) delta_x_ppar, delta_x_pdf = delta_x V_ppar, V_pdf = V ny = length(y) - begin_r_z_region() + begin_z_region() - @loop_r_z ir iz begin + @loop_z iz begin for iy ∈ 1:ny - delta_x_ppar[iz,ir] += y[iy] * V_ppar[iz,ir,iy] + delta_x_ppar[iz] += y[iy] * V_ppar[iz,iy] end end - begin_r_z_vperp_vpa_region() + begin_z_vperp_vpa_region() - @loop_r_z_vperp_vpa ir iz ivperp ivpa begin + @loop_z_vperp_vpa iz ivperp ivpa begin for iy ∈ 1:ny - delta_x_pdf[ivpa,ivperp,iz,ir] += y[iy] * V_pdf[ivpa,ivperp,iz,ir,iy] + delta_x_pdf[ivpa,ivperp,iz] += y[iy] * V_pdf[ivpa,ivperp,iz,iy] end end return nothing end -function parallel_delta_x_calc_s_r_z_vperp_vpa(delta_x::AbstractArray{mk_float, 5}, V, y) +function parallel_delta_x_calc(::Val{:srzvperpvpa}, delta_x::AbstractArray{mk_float, 5}, V, y) begin_s_r_z_vperp_vpa_region() @@ -1038,11 +1095,16 @@ end """ Apply the GMRES algorithm to solve the 'linear problem' J.δx^n = R(x^n), which is needed at each step of the outer Newton iteration (in `newton_solve!()`). + +Uses Givens rotations to reduce the upper Hessenberg matrix to an upper triangular form, +which allows conveniently finding the residual at each step, and computing the final +solution, without calculating a least-squares minimisation at each step. See 'algorithm 2 +MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869]. """ -function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol, atol, - restart, max_restarts, left_preconditioner, right_preconditioner, - H, V, rhs_delta, initial_guess, distributed_norm, distributed_dot, - parallel_map, parallel_delta_x_calc, serial_solve) +function linear_solve!(x, residual_func!, residual0, delta_x, v, w, solver_type::Val, + norm_params; coords, rtol, atol, restart, max_restarts, + left_preconditioner, right_preconditioner, H, c, s, g, V, + rhs_delta, initial_guess, serial_solve) # Solve (approximately?): # J δx = residual0 @@ -1062,9 +1124,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol function approximate_Jacobian_vector_product!(v) right_preconditioner(v) - parallel_map((x,v) -> x + Jv_scale_factor * v, v, x, v) + parallel_map(solver_type, (x,v) -> x + Jv_scale_factor * v, v, x, v) residual_func!(rhs_delta, v) - parallel_map((rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor, + parallel_map(solver_type, (rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor, v, rhs_delta, residual0) left_preconditioner(v) return v @@ -1072,14 +1134,22 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # To start with we use 'w' as a buffer to make a copy of residual0 to which we can apply # the left-preconditioner. - parallel_map((delta_x) -> delta_x, v, delta_x) + parallel_map(solver_type, (delta_x) -> delta_x, v, delta_x) left_preconditioner(residual0) # This function transforms the data stored in 'v' from δx to ≈J.δx approximate_Jacobian_vector_product!(v) # Now we actually set 'w' as the first Krylov vector, and normalise it. - parallel_map((residual0, v) -> -residual0 - v, w, residual0, v) - beta = distributed_norm(w) - parallel_map((w) -> w/beta, select_from_V(V, 1), w) + parallel_map(solver_type, (residual0, v) -> -residual0 - v, w, residual0, v) + beta = distributed_norm(solver_type, w, norm_params...) + parallel_map(solver_type, (w,beta) -> w/beta, select_from_V(V, 1), w, beta) + if serial_solve + g[1] = beta + else + begin_serial_region() + @serial_region begin + g[1] = beta + end + end # Set tolerance for GMRES iteration to rtol times the initial residual, unless this is # so small that it is smaller than atol, in which case use atol instead. @@ -1090,18 +1160,20 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol counter = 0 restart_counter = 1 while true + inner_counter = 0 for i ∈ 1:restart + inner_counter = i counter += 1 #println("Linear ", counter) # Compute next Krylov vector - parallel_map((V) -> V, w, select_from_V(V, i)) + parallel_map(solver_type, (V) -> V, w, select_from_V(V, i)) approximate_Jacobian_vector_product!(w) # Gram-Schmidt orthogonalization for j ∈ 1:i - parallel_map((V) -> V, v, select_from_V(V, j)) - w_dot_Vj = distributed_dot(w, v) + parallel_map(solver_type, (V) -> V, v, select_from_V(V, j)) + w_dot_Vj = distributed_dot(solver_type, w, v, norm_params...) if serial_solve H[j,i] = w_dot_Vj else @@ -1110,9 +1182,9 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol H[j,i] = w_dot_Vj end end - parallel_map((w, V) -> w - H[j,i] * V, w, w, select_from_V(V, j)) + parallel_map(solver_type, (w, V) -> w - H[j,i] * V, w, w, select_from_V(V, j)) end - norm_w = distributed_norm(w) + norm_w = distributed_norm(solver_type, w, norm_params...) if serial_solve H[i+1,i] = norm_w else @@ -1121,55 +1193,55 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol H[i+1,i] = norm_w end end - parallel_map((w) -> w / H[i+1,i], select_from_V(V, i+1), w) + parallel_map(solver_type, (w) -> w / H[i+1,i], select_from_V(V, i+1), w) - function temporary_residual!(result, guess) - #println("temporary residual ", size(result), " ", size(@view(H[1:i+1,1:i])), " ", size(guess)) - result .= @view(H[1:i+1,1:i]) * guess - result[1] -= beta - end - - # Second argument to fsolve needs to be a Vector{Float64} if serial_solve - resize!(initial_guess, i) - initial_guess[1] = beta - initial_guess[2:i] .= 0.0 - lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) - residual = norm(lsq_result.f) + for j ∈ 1:i-1 + gamma = c[j] * H[j,i] + s[j] * H[j+1,i] + H[j+1,i] = -s[j] * H[j,i] + c[j] * H[j+1,i] + H[j,i] = gamma + end + delta = sqrt(H[i,i]^2 + H[i+1,i]^2) + s[i] = H[i+1,i] / delta + c[i] = H[i,i] / delta + H[i,i] = c[i] * H[i,i] + s[i] * H[i+1,i] + H[i+1,i] = 0 + g[i+1] = -s[i] * g[i] + g[i] = c[i] * g[i] else begin_serial_region() - if global_rank[] == 0 - resize!(initial_guess, i) - initial_guess[1] = beta - initial_guess[2:i] .= 0.0 - lsq_result = fsolve(temporary_residual!, initial_guess, i+1; method=:lm) - residual = norm(lsq_result.f) - else - residual = nothing + @serial_region begin + for j ∈ 1:i-1 + gamma = c[j] * H[j,i] + s[j] * H[j+1,i] + H[j+1,i] = -s[j] * H[j,i] + c[j] * H[j+1,i] + H[j,i] = gamma + end + delta = sqrt(H[i,i]^2 + H[i+1,i]^2) + s[i] = H[i+1,i] / delta + c[i] = H[i,i] / delta + H[i,i] = c[i] * H[i,i] + s[i] * H[i+1,i] + H[i+1,i] = 0 + g[i+1] = -s[i] * g[i] + g[i] = c[i] * g[i] end - residual = MPI.bcast(residual, comm_world; root=0) + _block_synchronize() end + residual = abs(g[i+1]) + if residual < tol break end end + i = inner_counter - # Update initial guess fo restart - if serial_solve - y = lsq_result.x - else - if global_rank[] == 0 - y = lsq_result.x - else - y = nothing - end - y = MPI.bcast(y, comm_world; root=0) - end + # Update initial guess to restart + ################################# + + @views y = H[1:i,1:i] \ g[1:i] - # The following is the `parallel_map()` version of + # The following calculates # delta_x .= delta_x .+ sum(y[i] .* V[:,i] for i ∈ 1:length(y)) - # slightly abusing splatting to get the sum into a lambda-function. - parallel_delta_x_calc(delta_x, V, y) + parallel_delta_x_calc(solver_type, delta_x, V, y) right_preconditioner(delta_x) if residual < tol || restart_counter > max_restarts @@ -1180,16 +1252,16 @@ function linear_solve!(x, residual_func!, residual0, delta_x, v, w; coords, rtol # Store J.delta_x in the variable delta_x, to use it to calculate the new first # Krylov vector v/beta. - parallel_map((delta_x) -> delta_x, v, delta_x) + parallel_map(solver_type, (delta_x) -> delta_x, v, delta_x) approximate_Jacobian_vector_product!(v) # Note residual0 has already had the left_preconditioner!() applied to it. - parallel_map((residual0, v) -> -residual0 - v, v, residual0, v) - beta = distributed_norm(v) + parallel_map(solver_type, (residual0, v) -> -residual0 - v, v, residual0, v) + beta = distributed_norm(solver_type, v, norm_params...) for i ∈ 2:length(y) - parallel_map(() -> 0.0, select_from_V(V, i)) + parallel_map(solver_type, () -> 0.0, select_from_V(V, i)) end - parallel_map((v) -> v/beta, select_from_V(V, 1), v) + parallel_map(solver_type, (v,beta) -> v/beta, select_from_V(V, 1), v, beta) end return counter diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 16d0ef600..0c87369b7 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -200,6 +200,70 @@ function setup_runge_kutta_coefficients!(type, input_CFL_prefactor, split_operat else CFL_prefactor = input_CFL_prefactor end + elseif type == "PareschiRusso2(2,2,2)" + # 2nd-order, 2-stage IMEX method 'IMEX-SSP2(2,2,2)' from Pareschi & Russo 2005, Table II + # (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[-0.4142135623730950488016887242096980785696718753769480731766797379907324784621711 -0.5 ; + 0.9999999999999999999999999999999999999999999999999999999999999999999999999999827 -1.207106781186547524400844362104849039284835937688474036588339868995366239231094; + -0.0 0.5 ] + rk_coefs_implicit = mk_float[ 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 0.4142135623730950488016887242096980785696718753769480731766797379907324784621883 1.0; + -0.0 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 1.207106781186547524400844362104849039284835937688474036588339868995366239231094] + implicit_coefficient_is_zero = Bool[false, false] + n_rk_stages = 2 + rk_order = 2 + adaptive = false + low_storage = false + CFL_prefactor = NaN + elseif type == "PareschiRusso2(3,2,2)" +# 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,2,2)' from Pareschi & Russo 2005, Table III +# (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[2 -1 -1//2; + 0 0 0 ; + 0 1 -1//2; + 0 0 1//2] + rk_coefs_implicit = mk_float[1//2 -1 1 1//2; + 0 1//2 0 1//2; + 0 0 1//2 1//2] + implicit_coefficient_is_zero = Bool[false, false, false] + n_rk_stages = 3 + rk_order = 2 + adaptive = false + low_storage = false + CFL_prefactor = NaN + elseif type == "PareschiRusso2(3,3,2)" + # 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,2,2)' from Pareschi & Russo 2005, Table III + # (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[1 -4//3 -1//9; + 1//2 -1//3 -4//9; + 0 1//2 -1//3; + 0 0 1//3] + rk_coefs_implicit = mk_float[1//4 -1//2 4//3 4//9; + 0 1//4 5//6 4//9; + 0 0 1//3 2//3] + implicit_coefficient_is_zero = Bool[false, false, false] + n_rk_stages = 3 + rk_order = 2 + adaptive = false + low_storage = false + CFL_prefactor = NaN + elseif type == "PareschiRusso3(4,3,3)" + # 3rd-order, 4-stage IMEX method 'IMEX-SSP3(4,3,3)' from Pareschi & Russo 2005, Table VI + # (https://doi.org/10.1007/s10915-004-4636-4) + rk_coefs = mk_float[ 2.0 -5.27491721763532 0.9999999999999688 -0.1666666666666453; + -0.0 0.0 1.4589197899688663e-17 0.0 ; + -0.0 1.0 -0.0343646522044047 -0.500000000000007 ; + -0.0 -0.0 0.25 -2.091639072545107 ; + -0.0 -0.0 -0.0 0.6666666666666664] + rk_coefs_implicit = mk_float[ 0.24169426078821 -1.0 3.13745860881766 1.0436096431476471e-14 0.16666666666665975; + -0.0 0.24169426078821 2.13745860881766 -0.24999999999997924 0.3333333333333193 ; + -0.0 -0.0 0.24169426078821 0.034364652204404655 0.500000000000007 ; + -0.0 -0.0 -0.0 0.24169426078821 2.0916390725451066 ] + implicit_coefficient_is_zero = Bool[false, false, false, false] + n_rk_stages = 4 + rk_order = 3 + adaptive = false + low_storage = false + CFL_prefactor = NaN elseif type == "SSPRK4" n_rk_stages = 4 rk_coefs = allocate_float(3, n_rk_stages) @@ -993,51 +1057,53 @@ end """ adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, - total_points, current_dt, error_norm_method, - success, nl_max_its_fraction) + total_points, error_norm_method, success, + nl_max_its_fraction, composition; + electron=false, local_max_dt::mk_float=Inf) Use the calculated `CFL_limits` and `error_norms` to update the timestep in `t_params`. """ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, - total_points, current_dt, error_norm_method, - success, nl_max_its_fraction, composition; + total_points, error_norm_method, success, + nl_max_its_fraction, composition; electron=false, local_max_dt::mk_float=Inf) # Get global minimum of CFL limits - CFL_limit = nothing + CFL_limit = Ref(0.0) this_limit_caused_by = nothing @serial_region begin # Get maximum error over all blocks CFL_limits = MPI.Allreduce(CFL_limits, min, comm_inter_block[]) CFL_limit_caused_by = argmin(CFL_limits) - CFL_limit = CFL_limits[CFL_limit_caused_by] + CFL_limit[] = CFL_limits[CFL_limit_caused_by] # Reserve first four entries of t_params.limit_caused_by for max_increase_factor, # max_increase_factor_near_fail, minimum_dt, maximum_dt limits and # high_nl_iterations, then the next `n_variables` for RK accuracy limits. this_limit_caused_by = CFL_limit_caused_by + 5 + t_params.n_variables end + MPI.Bcast!(CFL_limit, comm_block[]) if error_norm_method == "Linf" # Get overall maximum error on the shared-memory block - error_norms = MPI.Reduce(error_norms, max, comm_block[]; root=0) + MPI.Reduce!(error_norms, max, comm_block[]; root=0) - error_norm = nothing + error_norm = Ref{mk_float}(0.0) max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks - error_norms = MPI.Allreduce(error_norms, max, comm_inter_block[]) + MPI.Allreduce!(error_norms, max, comm_inter_block[]) max_error_variable_index = argmax(error_norms) - error_norm = error_norms[max_error_variable_index] + error_norm[] = error_norms[max_error_variable_index] end - error_norm = MPI.bcast(error_norm, 0, comm_block[]) + MPI.Bcast!(error_norm, 0, comm_block[]) elseif error_norm_method == "L2" # Get overall maximum error on the shared-memory block - error_norms = MPI.Reduce(error_norms, +, comm_block[]; root=0) + MPI.Reduce!(error_norms, +, comm_block[]; root=0) - error_norm = nothing + error_norm = Ref{mk_float}(0.0) max_error_variable_index = -1 @serial_region begin # Get maximum error over all blocks - error_norms = MPI.Allreduce(error_norms, +, comm_inter_block[]) + MPI.Allreduce!(error_norms, +, comm_inter_block[]) # So far `error_norms` is the sum of squares of the errors. Now that summation # is finished, need to divide by total number of points and take square-root. @@ -1046,13 +1112,13 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, # Weight the error from each variable equally by taking the mean, so the # larger number of points in the distribution functions does not mean that # error on the moments is ignored. - error_norm = mean(error_norms) + error_norm[] = mean(error_norms) # Record which variable had the maximum error max_error_variable_index = argmax(error_norms) end - error_norm = MPI.bcast(error_norm, 0, comm_block[]) + MPI.Bcast!(error_norm, 0, comm_block[]) else error("Unrecognized error_norm_method '$method'") end @@ -1060,240 +1126,240 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, if success != "" # Iteration failed in implicit part of timestep try decreasing timestep - @serial_region begin - t_params.failure_counter[] += 1 - - if t_params.previous_dt[] > 0.0 - # If previous_dt=0, the previous step was also a failure so only update - # dt_before_last_fail when previous_dt>0 - t_params.dt_before_last_fail[] = t_params.previous_dt[] - end + t_params.failure_counter[] += 1 - # Decrease timestep by 1/2 - this factor should probably be settable! - # Note when nonlinear solve iteration fails, we do not enforce - # minimum_dt, as the timesolver must error if we do not decrease dt. - if t_params.dt[] > t_params.minimum_dt - # ...but try decreasing just to minimum_dt first, if the dt is still - # bigger than this. - t_params.dt[] = max(t_params.dt[] / 2.0, t_params.minimum_dt) - else - t_params.dt[] = t_params.dt[] / 2.0 - end + if t_params.previous_dt[] > 0.0 + # If previous_dt=0, the previous step was also a failure so only update + # dt_before_last_fail when previous_dt>0 + t_params.dt_before_last_fail[] = t_params.previous_dt[] + end - # Don't update the simulation time, as this step failed - t_params.previous_dt[] = 0.0 - - # Call the 'cause' of the timestep failure the variable that has the biggest - # error norm here. - # Could do with a better way to sort the different possible types of - # convergence failure... - if t_params.rk_coefs_implicit !== nothing && - composition.electron_physics ∈ (kinetic_electrons, - kinetic_electrons_with_temperature_equation) - if success == "nonlinear-solver" - t_params.failure_caused_by[end-1] += 1 - elseif success == "kinetic-electrons" - t_params.failure_caused_by[end] += 1 - else - error("Unrecognised cause of convergence failure: \"$success\"") - end - else + # Decrease timestep by 1/2 - this factor should probably be settable! + # Note when nonlinear solve iteration fails, we do not enforce + # minimum_dt, as the timesolver must error if we do not decrease dt. + if t_params.dt[] > t_params.minimum_dt + # ...but try decreasing just to minimum_dt first, if the dt is still + # bigger than this. + t_params.dt[] = max(t_params.dt[] / 2.0, t_params.minimum_dt) + else + t_params.dt[] = t_params.dt[] / 2.0 + end + + # Don't update the simulation time, as this step failed + t_params.previous_dt[] = 0.0 + + # Call the 'cause' of the timestep failure the variable that has the biggest + # error norm here. + # Could do with a better way to sort the different possible types of + # convergence failure... + if t_params.rk_coefs_implicit !== nothing && + composition.electron_physics ∈ (kinetic_electrons, + kinetic_electrons_with_temperature_equation) + if success == "nonlinear-solver" + t_params.failure_caused_by[end-1] += 1 + elseif success == "kinetic-electrons" t_params.failure_caused_by[end] += 1 + else + error("Unrecognised cause of convergence failure: \"$success\"") end - - # If we were trying to take a step to the output timestep, dt will be smaller on - # the re-try, so will not reach the output time. - t_params.step_to_moments_output[] = false - t_params.step_to_dfns_output[] = false + else + t_params.failure_caused_by[end] += 1 end - elseif (error_norm > 1.0 || isnan(error_norm)) && current_dt > t_params.minimum_dt * (1.0 + 1.0e-13) + + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_moments_output[] = false + t_params.step_to_dfns_output[] = false + elseif (error_norm[] > 1.0 || isnan(error_norm[])) && t_params.dt[] > t_params.minimum_dt * (1.0 + 1.0e-13) # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when # t+dt=next_output_time. - # Use current_dt instead of t_params.dt[] here because we are about to write to - # the shared-memory variable t_params.dt[] below, and we do not want to add an - # extra _block_synchronize() call after reading it here. # # Timestep failed, reduce timestep and re-try - @serial_region begin - t_params.failure_counter[] += 1 + t_params.failure_counter[] += 1 - if t_params.previous_dt[] > 0.0 - # If previous_dt=0, the previous step was also a failure so only update - # dt_before_last_fail when previous_dt>0 - t_params.dt_before_last_fail[] = t_params.previous_dt[] - end + if t_params.previous_dt[] > 0.0 + # If previous_dt=0, the previous step was also a failure so only update + # dt_before_last_fail when previous_dt>0 + t_params.dt_before_last_fail[] = t_params.previous_dt[] + end - # Get new timestep estimate using same formula as for a successful step, but - # limit decrease to factor 1/2 - this factor should probably be settable! - t_params.dt[] = max(t_params.dt[] / 2.0, - t_params.dt[] * t_params.step_update_prefactor * error_norm^(-1.0/t_params.rk_order)) - t_params.dt[] = max(t_params.dt[], t_params.minimum_dt) + # Get new timestep estimate using same formula as for a successful step, but + # limit decrease to factor 1/2 - this factor should probably be settable! + t_params.dt[] = max(t_params.dt[] / 2.0, + t_params.dt[] * t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order)) + t_params.dt[] = max(t_params.dt[], t_params.minimum_dt) - # Don't update the simulation time, as this step failed - t_params.previous_dt[] = 0.0 + # Don't update the simulation time, as this step failed + t_params.previous_dt[] = 0.0 - # Call the 'cause' of the timestep failure the variable that has the biggest - # error norm here + # Call the 'cause' of the timestep failure the variable that has the biggest + # error norm here + @serial_region begin t_params.failure_caused_by[max_error_variable_index] += 1 + end - # If we were trying to take a step to the output timestep, dt will be smaller on - # the re-try, so will not reach the output time. - t_params.step_to_moments_output[] = false - t_params.step_to_dfns_output[] = false + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_moments_output[] = false + t_params.step_to_dfns_output[] = false - #println("t=$t, timestep failed, error_norm=$error_norm, error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) - end + #println("t=$t, timestep failed, error_norm=$(error_norm[]), error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) else - @serial_region begin - # Save the timestep used to complete this step, this is used to update the - # simulation time. - t_params.previous_dt[] = t_params.dt[] - - if t_params.step_to_moments_output[] || t_params.step_to_dfns_output[] - # Completed an output step, reset dt to what it was before it was reduced to reach - # the output time - t_params.dt[] = t_params.dt_before_output[] - - if t_params.step_to_moments_output[] - t_params.step_to_moments_output[] = false - t_params.write_moments_output[] = true - end - if t_params.step_to_dfns_output[] - t_params.step_to_dfns_output[] = false - t_params.write_dfns_output[] = true - end + # Save the timestep used to complete this step, this is used to update the + # simulation time. + t_params.previous_dt[] = t_params.dt[] + + if t_params.step_to_moments_output[] || t_params.step_to_dfns_output[] + # Completed an output step, reset dt to what it was before it was reduced to reach + # the output time + t_params.dt[] = t_params.dt_before_output[] + + if t_params.step_to_moments_output[] + t_params.step_to_moments_output[] = false + t_params.write_moments_output[] = true + end + if t_params.step_to_dfns_output[] + t_params.step_to_dfns_output[] = false + t_params.write_dfns_output[] = true + end - if t_params.dt[] > CFL_limit - t_params.dt[] = CFL_limit - end + if t_params.dt[] > CFL_limit[] + t_params.dt[] = CFL_limit[] + end + else + # Adjust timestep according to Fehlberg's suggestion + # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method). + # `step_update_prefactor` is a constant numerical factor to make the estimate + # of a good value for the next timestep slightly conservative. It defaults to + # 0.9. + t_params.dt[] *= t_params.step_update_prefactor * error_norm[]^(-1.0/t_params.rk_order) + + if t_params.dt[] > CFL_limit[] + t_params.dt[] = CFL_limit[] else - # Adjust timestep according to Fehlberg's suggestion - # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method). - # `step_update_prefactor` is a constant numerical factor to make the estimate - # of a good value for the next timestep slightly conservative. It defaults to - # 0.9. - t_params.dt[] *= t_params.step_update_prefactor * error_norm^(-1.0/t_params.rk_order) - - if t_params.dt[] > CFL_limit - t_params.dt[] = CFL_limit - else - # Reserve first four entries of t_params.limit_caused_by for - # max_increase_factor, max_increase_factor_near_fail, minimum_dt and - # maximum_dt limits, high_nl_iterations. + # Reserve first four entries of t_params.limit_caused_by for + # max_increase_factor, max_increase_factor_near_fail, minimum_dt and + # maximum_dt limits, high_nl_iterations. + @serial_region begin this_limit_caused_by = 5 + max_error_variable_index end + end - # Limit so timestep cannot increase by a large factor, which might lead to - # numerical instability in some cases. - max_cap_limit_caused_by = 1 - if isinf(t_params.max_increase_factor_near_last_fail) - # Not using special timestep limiting near last failed dt value - max_cap = t_params.max_increase_factor * t_params.previous_dt[] - else - max_cap = t_params.max_increase_factor * t_params.previous_dt[] - slow_increase_threshold = t_params.dt_before_last_fail[] / t_params.last_fail_proximity_factor - if t_params.previous_dt[] > t_params.dt_before_last_fail[] * t_params.last_fail_proximity_factor - # dt has successfully exceeded the last failed value, so allow it - # to increase more quickly again - t_params.dt_before_last_fail[] = Inf - elseif max_cap > slow_increase_threshold - # dt is getting close to last failed value, so increase more - # slowly - max_cap = max(slow_increase_threshold, - t_params.max_increase_factor_near_last_fail * - t_params.previous_dt[]) - max_cap_limit_caused_by = 2 - end + # Limit so timestep cannot increase by a large factor, which might lead to + # numerical instability in some cases. + max_cap_limit_caused_by = 1 + if isinf(t_params.max_increase_factor_near_last_fail) + # Not using special timestep limiting near last failed dt value + max_cap = t_params.max_increase_factor * t_params.previous_dt[] + else + max_cap = t_params.max_increase_factor * t_params.previous_dt[] + slow_increase_threshold = t_params.dt_before_last_fail[] / t_params.last_fail_proximity_factor + if t_params.previous_dt[] > t_params.dt_before_last_fail[] * t_params.last_fail_proximity_factor + # dt has successfully exceeded the last failed value, so allow it + # to increase more quickly again + t_params.dt_before_last_fail[] = Inf + elseif max_cap > slow_increase_threshold + # dt is getting close to last failed value, so increase more + # slowly + max_cap = max(slow_increase_threshold, + t_params.max_increase_factor_near_last_fail * + t_params.previous_dt[]) + max_cap_limit_caused_by = 2 end - if t_params.dt[] > max_cap - t_params.dt[] = max_cap + end + if t_params.dt[] > max_cap + t_params.dt[] = max_cap + @serial_region begin this_limit_caused_by = max_cap_limit_caused_by end + end - # Prevent timestep from going below minimum_dt - if t_params.dt[] < t_params.minimum_dt - t_params.dt[] = t_params.minimum_dt + # Prevent timestep from going below minimum_dt + if t_params.dt[] < t_params.minimum_dt + t_params.dt[] = t_params.minimum_dt + @serial_region begin this_limit_caused_by = 3 end + end - # Prevent timestep from going above maximum_dt - max_dt = min(t_params.maximum_dt, local_max_dt) - if t_params.dt[] > max_dt - t_params.dt[] = max_dt + # Prevent timestep from going above maximum_dt + max_dt = min(t_params.maximum_dt, local_max_dt) + if t_params.dt[] > max_dt + t_params.dt[] = max_dt + @serial_region begin this_limit_caused_by = 4 end + end - if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0 - # The last step took many nonlinear iterations, so do not allow the - # timestep to increase. - # If t_params.previous_dt[]==0.0, then the previous step failed so - # timestep will not be increasing, so do not need this check. - if t_params.dt[] > t_params.previous_dt[] - t_params.dt[] = t_params.previous_dt[] + if nl_max_its_fraction > 0.5 && t_params.previous_dt[] > 0.0 + # The last step took many nonlinear iterations, so do not allow the + # timestep to increase. + # If t_params.previous_dt[]==0.0, then the previous step failed so + # timestep will not be increasing, so do not need this check. + if t_params.dt[] > t_params.previous_dt[] + t_params.dt[] = t_params.previous_dt[] + @serial_region begin this_limit_caused_by = 5 end end + end + @serial_region begin t_params.limit_caused_by[this_limit_caused_by] += 1 + end - if (t_params.step_counter[] % 1000 == 0) && global_rank[] == 0 - prefix = electron ? "electron" : "ion" - println("$prefix step ", t_params.step_counter[], ": t=", - round(t_params.t[], sigdigits=6), ", nfail=", - t_params.failure_counter[], ", dt=", t_params.dt[]) - end + if (t_params.step_counter[] % 1000 == 0) && global_rank[] == 0 + prefix = electron ? "electron" : "ion" + println("$prefix step ", t_params.step_counter[], ": t=", + round(t_params.t[], sigdigits=6), ", nfail=", + t_params.failure_counter[], ", dt=", t_params.dt[]) end end end - @serial_region begin - minimum_dt = 1.e-14 - if t_params.dt[] < minimum_dt - println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " - * "$minimum_dt at t=$(t_params.t[]). Ending run.") - # Set dt negative to signal an error - t_params.dt[] = -1.0 - end - - current_time = t_params.t[] + t_params.previous_dt[] - # Store here to ensure dt_before_output is set correctly when both moments and - # dfns are written at the same time. - current_dt = t_params.dt[] - if (!t_params.write_after_fixed_step_count - && !t_params.write_moments_output[] - && length(t_params.moments_output_times) > 0 - && (t_params.moments_output_counter[] ≤ length(t_params.moments_output_times)) - && (current_time + t_params.dt[] >= t_params.moments_output_times[t_params.moments_output_counter[]])) - - t_params.dt_before_output[] = current_dt - t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time - t_params.step_to_moments_output[] = true - - if t_params.dt[] < 0.0 - error("When trying to step to next output time, made negative timestep " - * "dt=$(t_params.dt[])") - end - end - if (!t_params.write_after_fixed_step_count - && !t_params.write_dfns_output[] - && length(t_params.dfns_output_times) > 0 - && (t_params.dfns_output_counter[] ≤ length(t_params.dfns_output_times)) - && (current_time + t_params.dt[] >= t_params.dfns_output_times[t_params.dfns_output_counter[]])) + minimum_dt = 1.e-14 + if t_params.dt[] < minimum_dt + println("Time advance failed: trying to set dt=$(t_params.dt[]) less than " + * "$minimum_dt at t=$(t_params.t[]). Ending run.") + # Set dt negative to signal an error + t_params.dt[] = -1.0 + end - t_params.dt_before_output[] = current_dt - t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time - t_params.step_to_dfns_output[] = true + current_time = t_params.t[] + t_params.previous_dt[] + # Store here to ensure dt_before_output is set correctly when both moments and + # dfns are written at the same time. + current_dt = t_params.dt[] + if (!t_params.write_after_fixed_step_count + && !t_params.write_moments_output[] + && length(t_params.moments_output_times) > 0 + && (t_params.moments_output_counter[] ≤ length(t_params.moments_output_times)) + && (current_time + t_params.dt[] >= t_params.moments_output_times[t_params.moments_output_counter[]])) - if t_params.dt[] < 0.0 - error("When trying to step to next output time, made negative timestep " - * "dt=$(t_params.dt[])") - end + t_params.dt_before_output[] = current_dt + t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time + t_params.step_to_moments_output[] = true + + if t_params.dt[] < 0.0 + error("When trying to step to next output time, made negative timestep " + * "dt=$(t_params.dt[])") end end + if (!t_params.write_after_fixed_step_count + && !t_params.write_dfns_output[] + && length(t_params.dfns_output_times) > 0 + && (t_params.dfns_output_counter[] ≤ length(t_params.dfns_output_times)) + && (current_time + t_params.dt[] >= t_params.dfns_output_times[t_params.dfns_output_counter[]])) + + t_params.dt_before_output[] = current_dt + t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time + t_params.step_to_dfns_output[] = true - # Shared-memory variables have been updated, so synchronize - _block_synchronize() + if t_params.dt[] < 0.0 + error("When trying to step to next output time, made negative timestep " + * "dt=$(t_params.dt[])") + end + end return nothing end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 634699653..06b5219c5 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -154,19 +154,19 @@ struct scratch_dummy_arrays buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} # buffers to hold moment quantities for implicit solves - implicit_buffer_zr_1::MPISharedArray{mk_float,2} - implicit_buffer_zr_2::MPISharedArray{mk_float,2} - implicit_buffer_zr_3::MPISharedArray{mk_float,2} - implicit_buffer_zr_4::MPISharedArray{mk_float,2} - implicit_buffer_zr_5::MPISharedArray{mk_float,2} - implicit_buffer_zr_6::MPISharedArray{mk_float,2} + implicit_buffer_z_1::MPISharedArray{mk_float,1} + implicit_buffer_z_2::MPISharedArray{mk_float,1} + implicit_buffer_z_3::MPISharedArray{mk_float,1} + implicit_buffer_z_4::MPISharedArray{mk_float,1} + implicit_buffer_z_5::MPISharedArray{mk_float,1} + implicit_buffer_z_6::MPISharedArray{mk_float,1} # buffers to hold electron for implicit solves - implicit_buffer_vpavperpzr_1::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_2::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_3::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_4::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_5::MPISharedArray{mk_float,4} - implicit_buffer_vpavperpzr_6::MPISharedArray{mk_float,4} + implicit_buffer_vpavperpz_1::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_2::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_3::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_4::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_5::MPISharedArray{mk_float,3} + implicit_buffer_vpavperpz_6::MPISharedArray{mk_float,3} # buffers to hold ion pdf for implicit solves implicit_buffer_vpavperpzrs_1::MPISharedArray{mk_float,5} implicit_buffer_vpavperpzrs_2::MPISharedArray{mk_float,5} @@ -319,9 +319,14 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, t_input["split_operators"]) if !adaptive - # No adaptive timestep, want to use the value from the input file even when we are - # restarting - dt_reload = nothing + if electron !== nothing + # No adaptive timestep, want to use the value from the input file even when we are + # restarting. + # Do not want to do this for electrons, because electron_backward_euler!() + # uses an adaptive timestep (based on nonlinear solver iteration counts) even + # though it does not use an adaptive RK scheme. + dt_reload = nothing + end # Makes no sense to use write_error_diagnostics because non-adaptive schemes have # no error estimate @@ -336,29 +341,15 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, * "`write_after_fixed_step_count=true`.") end - t_shared = allocate_shared_float(1) - dt_shared = allocate_shared_float(1) - previous_dt_shared = allocate_shared_float(1) - next_output_time = allocate_shared_float(1) - dt_before_output = allocate_shared_float(1) - dt_before_last_fail = allocate_shared_float(1) - step_to_moments_output = allocate_shared_bool(1) - step_to_dfns_output = allocate_shared_bool(1) - write_moments_output = allocate_shared_bool(1) - write_dfns_output = allocate_shared_bool(1) - if block_rank[] == 0 - t_shared[] = code_time - dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload - previous_dt_shared[] = dt_reload === nothing ? t_input["dt"] : dt_reload - next_output_time[] = 0.0 - dt_before_output[] = dt_reload === nothing ? t_input["dt"] : dt_reload - dt_before_last_fail[] = dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload - step_to_moments_output[] = false - step_to_dfns_output[] = false - write_moments_output[] = false - write_dfns_output[] = false - end - _block_synchronize() + t = Ref(code_time) + dt = Ref(dt_reload === nothing ? t_input["dt"] : dt_reload) + previous_dt = Ref(dt[]) + dt_before_output = Ref(dt[]) + dt_before_last_fail = Ref(dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload) + step_to_moments_output = Ref(false) + step_to_dfns_output = Ref(false) + write_moments_output = Ref(false) + write_dfns_output = Ref(false) end_time = code_time + t_input["dt"] * t_input["nstep"] epsilon = 1.e-11 @@ -405,7 +396,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end end - if t_input["implicit_vpa_advection"] + if electron !== nothing && t_input["implicit_vpa_advection"] error("implicit_vpa_advection does not work at the moment. Need to figure out " * "what to do with constraints, as explicit and implicit parts would not " * "preserve constaints separately.") @@ -429,19 +420,28 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, else debug_io = nothing end + decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"] + increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"] + cap_factor_ion_dt = t_input["cap_factor_ion_dt"] electron_t_params = nothing elseif electron === false debug_io = nothing + decrease_dt_iteration_threshold = -1 + increase_dt_iteration_threshold = typemax(mk_int) + cap_factor_ion_dt = Inf electron_t_params = nothing else debug_io = nothing + decrease_dt_iteration_threshold = -1 + increase_dt_iteration_threshold = typemax(mk_int) + cap_factor_ion_dt = Inf electron_t_params = electron end - return time_info(n_variables, t_input["nstep"], end_time, t_shared, dt_shared, - previous_dt_shared, next_output_time, dt_before_output, - dt_before_last_fail, CFL_prefactor, step_to_moments_output, - step_to_dfns_output, write_moments_output, write_dfns_output, Ref(0), - Ref(0), Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"], + return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt, + dt_before_output, dt_before_last_fail, CFL_prefactor, + step_to_moments_output, step_to_dfns_output, write_moments_output, + write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0), + Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"], t_input["nwrite_dfns"], moments_output_times, dfns_output_times, t_input["type"], rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, @@ -455,6 +455,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, electron !== nothing && t_input["implicit_ion_advance"], electron !== nothing && t_input["implicit_vpa_advection"], electron !== nothing && t_input["implicit_electron_ppar"], + t_input["constraint_forcing_rate"], decrease_dt_iteration_threshold, + increase_dt_iteration_threshold, cap_factor_ion_dt, t_input["write_after_fixed_step_count"], error_sum_zero, t_input["split_operators"], t_input["steady_state_residual"], t_input["converged_residual_value"], @@ -478,7 +480,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop geometry, boundary_distributions, external_source_settings, num_diss_params, manufactured_solns_input, advection_structs, io_input, restarting, - restart_electron_physics, input_dict) + restart_electron_physics, input_dict; + skip_electron_solve=false) # define some local variables for convenience/tidiness n_ion_species = composition.n_ion_species n_neutral_species = composition.n_neutral_species @@ -660,13 +663,14 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0) nl_solver_electron_advance_params = - setup_nonlinear_solve(t_params.implicit_electron_advance, input_dict, - (r=r, z=z, vperp=vperp, vpa=vpa), - (); + setup_nonlinear_solve(t_params.implicit_electron_advance || composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation), + input_dict, + (z=z, vperp=vperp, vpa=vpa), + (r,); default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - preconditioner_type="lu") + preconditioner_type="electron_lu") nl_solver_ion_advance_params = setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict, (s=composition.n_ion_species, r=r, z=z, vperp=vperp, @@ -688,7 +692,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same " * "time") end - if nl_solver_electron_advance_params !== nothing && t_params.implicit_electron_ppar + if t_params.implicit_electron_advance && t_params.implicit_electron_ppar error("Cannot use implicit_electron_advance and implicit_electron_ppar at the " * "same time.") end @@ -775,7 +779,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop external_source_settings, scratch_dummy, scratch, scratch_electron, nl_solver_params, t_params, t_input, num_diss_params, advection_structs, io_input, input_dict; - restart_electron_physics=restart_electron_physics) + restart_electron_physics=restart_electron_physics, + skip_electron_solve=skip_electron_solve) end # update the derivatives of the electron moments as these may be needed when @@ -1490,34 +1495,34 @@ function setup_dummy_and_buffer_arrays(nr, nz, nvpa, nvperp, nvz, nvr, nvzeta, buffer_vpavperpr_5 = allocate_shared_float(nvpa,nvperp,nr) buffer_vpavperpr_6 = allocate_shared_float(nvpa,nvperp,nr) - if t_params.implicit_electron_advance - implicit_buffer_zr_1 = allocate_shared_float(nz,nr) - implicit_buffer_zr_2 = allocate_shared_float(nz,nr) - implicit_buffer_zr_3 = allocate_shared_float(nz,nr) - implicit_buffer_zr_4 = allocate_shared_float(nz,nr) - implicit_buffer_zr_5 = allocate_shared_float(nz,nr) - implicit_buffer_zr_6 = allocate_shared_float(nz,nr) - - implicit_buffer_vpavperpzr_1 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_2 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_3 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_4 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_5 = allocate_shared_float(nvpa,nvperp,nz,nr) - implicit_buffer_vpavperpzr_6 = allocate_shared_float(nvpa,nvperp,nz,nr) + if t_params.implicit_electron_advance || true + implicit_buffer_z_1 = allocate_shared_float(nz) + implicit_buffer_z_2 = allocate_shared_float(nz) + implicit_buffer_z_3 = allocate_shared_float(nz) + implicit_buffer_z_4 = allocate_shared_float(nz) + implicit_buffer_z_5 = allocate_shared_float(nz) + implicit_buffer_z_6 = allocate_shared_float(nz) + + implicit_buffer_vpavperpz_1 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_2 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_3 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_4 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_5 = allocate_shared_float(nvpa,nvperp,nz) + implicit_buffer_vpavperpz_6 = allocate_shared_float(nvpa,nvperp,nz) else - implicit_buffer_zr_1 = allocate_shared_float(0,0) - implicit_buffer_zr_2 = allocate_shared_float(0,0) - implicit_buffer_zr_3 = allocate_shared_float(0,0) - implicit_buffer_zr_4 = allocate_shared_float(0,0) - implicit_buffer_zr_5 = allocate_shared_float(0,0) - implicit_buffer_zr_6 = allocate_shared_float(0,0) + implicit_buffer_z_1 = allocate_shared_float(0) + implicit_buffer_z_2 = allocate_shared_float(0) + implicit_buffer_z_3 = allocate_shared_float(0) + implicit_buffer_z_4 = allocate_shared_float(0) + implicit_buffer_z_5 = allocate_shared_float(0) + implicit_buffer_z_6 = allocate_shared_float(0) - implicit_buffer_vpavperpzr_1 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_2 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_3 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_4 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_5 = allocate_shared_float(0,0,0,0) - implicit_buffer_vpavperpzr_6 = allocate_shared_float(0,0,0,0) + implicit_buffer_vpavperpz_1 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_2 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_3 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_4 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_5 = allocate_shared_float(0,0,0) + implicit_buffer_vpavperpz_6 = allocate_shared_float(0,0,0) end if t_params.implicit_ion_advance @@ -1571,8 +1576,8 @@ function setup_dummy_and_buffer_arrays(nr, nz, nvpa, nvperp, nvz, nvr, nvzeta, buffer_vpavperpzs_1,buffer_vpavperpzs_2,buffer_vpavperpzs_3,buffer_vpavperpzs_4,buffer_vpavperpzs_5,buffer_vpavperpzs_6, buffer_vpavperprs_1,buffer_vpavperprs_2,buffer_vpavperprs_3,buffer_vpavperprs_4,buffer_vpavperprs_5,buffer_vpavperprs_6, buffer_vpavperpzrs_1,buffer_vpavperpzrs_2, - implicit_buffer_zr_1,implicit_buffer_zr_2,implicit_buffer_zr_3,implicit_buffer_zr_4,implicit_buffer_zr_5,implicit_buffer_zr_6, - implicit_buffer_vpavperpzr_1,implicit_buffer_vpavperpzr_2,implicit_buffer_vpavperpzr_3,implicit_buffer_vpavperpzr_4,implicit_buffer_vpavperpzr_5,implicit_buffer_vpavperpzr_6, + implicit_buffer_z_1,implicit_buffer_z_2,implicit_buffer_z_3,implicit_buffer_z_4,implicit_buffer_z_5,implicit_buffer_z_6, + implicit_buffer_vpavperpz_1,implicit_buffer_vpavperpz_2,implicit_buffer_vpavperpz_3,implicit_buffer_vpavperpz_4,implicit_buffer_vpavperpz_5,implicit_buffer_vpavperpz_6, implicit_buffer_vpavperpzrs_1,implicit_buffer_vpavperpzrs_2,implicit_buffer_vpavperpzrs_3,implicit_buffer_vpavperpzrs_4,implicit_buffer_vpavperpzrs_5,implicit_buffer_vpavperpzrs_6, buffer_vzvrvzetazsn_1,buffer_vzvrvzetazsn_2,buffer_vzvrvzetazsn_3,buffer_vzvrvzetazsn_4,buffer_vzvrvzetazsn_5,buffer_vzvrvzetazsn_6, buffer_vzvrvzetarsn_1,buffer_vzvrvzetarsn_2,buffer_vzvrvzetarsn_3,buffer_vzvrvzetarsn_4,buffer_vzvrvzetarsn_5,buffer_vzvrvzetarsn_6, @@ -1789,10 +1794,7 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_param diagnostic_checks, t_params.step_counter[]) end # update the time - @serial_region begin - t_params.t[] += t_params.previous_dt[] - end - _block_synchronize() + t_params.t[] += t_params.previous_dt[] if t_params.t[] ≥ t_params.end_time - epsilon || (t_params.write_after_fixed_step_count && @@ -1818,11 +1820,8 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_param write_moments = t_params.write_moments_output[] || finish_now write_dfns = t_params.write_dfns_output[] || finish_now - _block_synchronize() - @serial_region begin - t_params.write_moments_output[] = false - t_params.write_dfns_output[] = false - end + t_params.write_moments_output[] = false + t_params.write_dfns_output[] = false else write_moments = (t_params.step_counter[] % t_params.nwrite_moments == 0 || t_params.step_counter[] >= t_params.nstep @@ -2009,8 +2008,8 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_param scratch[t_params.n_rk_stages+1], pdf, moments, fields, nothing, nothing, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, advance, scratch_dummy, false; pdf_bc_constraints=false, - update_electrons=false) + t_params, nl_solver_params, advance, scratch_dummy, false, 0, 0.0; + pdf_bc_constraints=false, update_electrons=false) end if finish_now @@ -2301,7 +2300,8 @@ function apply_all_bcs_constraints_update_moments!( this_scratch, pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, advance, scratch_dummy, diagnostic_moments; pdf_bc_constraints=true, + t_params, nl_solver_params, advance, scratch_dummy, diagnostic_moments, + max_electron_pdf_iterations, max_electron_sim_time; pdf_bc_constraints=true, update_electrons=true) begin_s_r_z_region() @@ -2364,10 +2364,6 @@ function apply_all_bcs_constraints_update_moments!( composition.electron_physics) if composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) - #max_electron_pdf_iterations = 1000 - #max_electron_sim_time = nothing - max_electron_pdf_iterations = nothing - max_electron_sim_time = 1.0e-3 # Copy ion and electron moments from `scratch` into `moments` to be used in # electron kinetic equation update @@ -2396,15 +2392,14 @@ function apply_all_bcs_constraints_update_moments!( # to the beginning of the ion/neutral timestep, so the electron solution # calculated here would be discarded - we might as well skip calculating it in # that case. - if update_electrons && - !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) && - success == "" + if update_electrons && !(t_params.implicit_electron_advance) && success == "" kinetic_electron_success = update_electron_pdf!( scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, electron_z_advect, electron_vpa_advect, scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, - max_electron_pdf_iterations, max_electron_sim_time) + nl_solver_params.electron_advance, max_electron_pdf_iterations, + max_electron_sim_time) success = kinetic_electron_success end end @@ -2482,12 +2477,12 @@ end """ adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, - t_params, moments, fields, + t_params, pdf, moments, fields, boundary_distributions, composition, collisions, geometry, external_source_settings, spectral_objects, - advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, - success, nl_max_its_fraction) + advect_objects, gyroavs, num_diss_params, + nl_solver_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) Check the error estimate for the embedded RK method and adjust the timestep if appropriate. @@ -2496,9 +2491,9 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, t_params, pdf, moments, fields, boundary_distributions, composition, collisions, geometry, external_source_settings, spectral_objects, - advect_objects, gyroavs, num_diss_params, advance, - scratch_dummy, r, z, vperp, vpa, vzeta, vr, vz, - success, nl_max_its_fraction) + advect_objects, gyroavs, num_diss_params, + nl_solver_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) #error_norm_method = "Linf" error_norm_method = "L2" @@ -2520,11 +2515,6 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, error_norms = error_norm_type[] total_points = mk_int[] - # Read the current dt here, so we only need one _block_synchronize() call for this and - # the begin_s_r_z_vperp_vpa_region() - current_dt = t_params.dt[] - _block_synchronize() - # Test CFL conditions for advection in kinetic equation to give stability limit for # timestep # @@ -2629,8 +2619,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, loworder_constraints_scratch, pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, - external_source_settings, num_diss_params, t_params, advance, scratch_dummy, - false; update_electrons=false) + external_source_settings, num_diss_params, t_params, nl_solver_params, advance, + scratch_dummy, false, 0, 0.0; update_electrons=false) # Re-calculate moment derivatives in the `moments` struct, in case they were changed # by the previous call @@ -2638,8 +2628,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, scratch[t_params.n_rk_stages+1], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, - external_source_settings, num_diss_params, t_params, advance, scratch_dummy, - false; pdf_bc_constraints=false, update_electrons=false) + external_source_settings, num_diss_params, t_params, nl_solver_params, advance, + scratch_dummy, false, 0, 0.0; pdf_bc_constraints=false, update_electrons=false) # Calculate the timstep error estimates if z.bc == "wall" && (moments.evolve_upar || moments.evolve_ppar) @@ -2811,8 +2801,8 @@ function adaptive_timestep_update!(scratch, scratch_implicit, scratch_electron, end adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, total_points, - current_dt, error_norm_method, success, - nl_max_its_fraction, composition) + error_norm_method, success, nl_max_its_fraction, + composition) if composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation) @@ -2929,6 +2919,9 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, n_rk_stages = t_params.n_rk_stages + max_electron_pdf_iterations = 1000 + max_electron_sim_time = 1.0e-3 + first_scratch = scratch[1] @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin first_scratch.pdf[ivpa,ivperp,iz,ir,is] = pdf.ion.norm[ivpa,ivperp,iz,ir,is] @@ -3022,12 +3015,15 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, # The result of the implicit solve gives the state vector at 'istage' # which is used as input to the explicit part of the IMEX time step. old_scratch = scratch_implicit[istage] + update_electrons = !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) success = apply_all_bcs_constraints_update_moments!( scratch_implicit[istage], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, external_source_settings, num_diss_params, - t_params, advance, scratch_dummy, false) + t_params, nl_solver_params, advance, scratch_dummy, false, + max_electron_pdf_iterations, max_electron_sim_time; + update_electrons=update_electrons) if success != "" # Break out of the istage loop, as passing `success != ""` to the # adaptive timestep update function will signal a failed timestep, so @@ -3066,15 +3062,16 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, || t_params.implicit_coefficient_is_zero[istage+1]) update_electrons = (t_params.rk_coefs_implicit === nothing || !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) - || t_params.implicit_coefficient_is_zero[istage+1] + || (istage < n_rk_stages && t_params.implicit_coefficient_is_zero[istage+1]) || (istage == n_rk_stages && t_params.implicit_coefficient_is_zero[1])) diagnostic_moments = diagnostic_checks && istage == n_rk_stages success = apply_all_bcs_constraints_update_moments!( scratch[istage+1], pdf, moments, fields, boundary_distributions, scratch_electron, vz, vr, vzeta, vpa, vperp, z, r, spectral_objects, advect_objects, composition, collisions, geometry, gyroavs, - external_source_settings, num_diss_params, t_params, advance, scratch_dummy, - diagnostic_moments; pdf_bc_constraints=apply_bc_constraints, + external_source_settings, num_diss_params, t_params, nl_solver_params, + advance, scratch_dummy, diagnostic_moments, max_electron_pdf_iterations, + max_electron_sim_time; pdf_bc_constraints=apply_bc_constraints, update_electrons=update_electrons) if success != "" # Break out of the istage loop, as passing `success != ""` to the @@ -3086,7 +3083,27 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, if t_params.adaptive nl_max_its_fraction = 0.0 - for p ∈ nl_solver_params + if t_params.implicit_electron_advance + params_to_check = (nl_solver_params.ion_advance, + nl_solver_params.vpa_advection, + nl_solver_params.electron_conduction, + nl_solver_params.electron_advance) + else + # nl_solver_params.electron_advance is used for the backward-Euler timestep in + # electron timestepping, so its iteration count is not relevant here. Instead, + # check the number of electron pseudo-timesteps or pseudo-time increment + # compared to their maximum values + params_to_check = (nl_solver_params.ion_advance, + nl_solver_params.vpa_advection, + nl_solver_params.electron_conduction) + if t_params.electron !== nothing + electron_time_advance_fraction = + min(t_params.electron.max_step_count_this_ion_step[] / max_electron_pdf_iterations, + t_params.electron.max_t_increment_this_ion_step[] / max_electron_sim_time) + nl_max_its_fraction = max(electron_time_advance_fraction, nl_max_its_fraction) + end + end + for p ∈ params_to_check if p !== nothing nl_max_its_fraction = max(p.max_nonlinear_iterations_this_step[] / p.nonlinear_max_iterations, @@ -3098,8 +3115,8 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, boundary_distributions, composition, collisions, geometry, external_source_settings, spectral_objects, advect_objects, gyroavs, num_diss_params, - advance, scratch_dummy, r, z, vperp, vpa, - vzeta, vr, vz, success, nl_max_its_fraction) + nl_solver_params, advance, scratch_dummy, r, z, vperp, + vpa, vzeta, vr, vz, success, nl_max_its_fraction) elseif success != "" error("Implicit part of timestep failed") end @@ -3108,8 +3125,14 @@ function ssp_rk!(pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, # println() #end - reset_nonlinear_per_stage_counters(nl_solver_params.ion_advance) - reset_nonlinear_per_stage_counters(nl_solver_params.vpa_advection) + reset_nonlinear_per_stage_counters!(nl_solver_params.ion_advance) + reset_nonlinear_per_stage_counters!(nl_solver_params.vpa_advection) + reset_nonlinear_per_stage_counters!(nl_solver_params.electron_conduction) + if !t_params.implicit_electron_advance && t_params.electron !== nothing + t_params.electron.max_step_count_this_ion_step[] = 0 + t_params.electron.max_t_increment_this_ion_step[] = 0.0 + end + if t_params.previous_dt[] > 0.0 istage = n_rk_stages+1 @@ -3444,8 +3467,8 @@ function euler_time_advance!(fvec_out, fvec_in, pdf, fields, moments, fvec_in.upar, fvec_in.ppar, fvec_in.density_neutral, fvec_in.uz_neutral, fvec_in.pz_neutral, moments.electron, collisions, dt, composition, - external_source_settings.electron, num_diss_params, z; - conduction=advance.electron_conduction) + external_source_settings.electron, num_diss_params, r, + z; conduction=advance.electron_conduction) elseif advance.electron_conduction # Explicit version of the implicit part of the IMEX timestep, need to evaluate # only the conduction term. @@ -3476,7 +3499,7 @@ function backward_euler!(fvec_out, fvec_in, scratch_electron, pdf, fields, momen electron_z_advect, electron_vpa_advect = advect_objects.electron_z_advect, advect_objects.electron_vpa_advect neutral_z_advect, neutral_r_advect, neutral_vz_advect = advect_objects.neutral_z_advect, advect_objects.neutral_r_advect, advect_objects.neutral_vz_advect - if nl_solver_params.electron_advance !== nothing + if t_params.implicit_electron_advance success = implicit_electron_advance!(fvec_out, fvec_in, pdf, scratch_electron, moments, fields, collisions, composition, geometry, external_source_settings, @@ -3484,11 +3507,10 @@ function backward_euler!(fvec_out, fvec_in, scratch_electron, pdf, fields, momen r_spectral, z_spectral, vperp_spectral, vpa_spectral, electron_z_advect, electron_vpa_advect, gyroavs, scratch_dummy, - dt, nl_solver_params.electron_advance) + t_params.electron, t_params.dt[], + nl_solver_params.electron_advance) elseif t_params.implicit_electron_ppar - #max_electron_pdf_iterations = 1000 - #max_electron_sim_time = nothing - max_electron_pdf_iterations = nothing + max_electron_pdf_iterations = 1000 max_electron_sim_time = 1.0e-3 electron_success = update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, @@ -3497,6 +3519,7 @@ function backward_euler!(fvec_out, fvec_in, scratch_electron, pdf, fields, momen scratch_dummy, t_params.electron, collisions, composition, external_source_settings, num_diss_params, + nl_solver_params.electron_advance, max_electron_pdf_iterations, max_electron_sim_time; ion_dt=dt) success = (electron_success == "") @@ -3785,8 +3808,6 @@ function implicit_ion_advance!(fvec_out, fvec_in, pdf, fields, moments, advect_o left_preconditioner=left_preconditioner, right_preconditioner=right_preconditioner) - nl_solver_params.stage_counter[] += 1 - return success end diff --git a/moment_kinetics/src/utils.jl b/moment_kinetics/src/utils.jl index b5b0863bb..a7f20ce1e 100644 --- a/moment_kinetics/src/utils.jl +++ b/moment_kinetics/src/utils.jl @@ -226,7 +226,14 @@ function get_default_restart_filename(io_input, prefix; error_if_no_file_found=t error("Unrecognized binary_format '$binary_format'") end restart_filename_pattern = joinpath(io_input.output_dir, io_input.run_name * ".$prefix*." * ext) - restart_filename_glob = glob(restart_filename_pattern) + if isabspath(restart_filename_pattern) + # Special handling for absolute paths, as these give an error when `glob()` is + # called normally + restart_filename_glob = glob(basename(restart_filename_pattern), + dirname(restart_filename_pattern)) + else + restart_filename_glob = glob(restart_filename_pattern) + end if length(restart_filename_glob) == 0 if error_if_no_file_found error("No '$prefix' output file to restart from found matching the pattern " diff --git a/moment_kinetics/src/velocity_moments.jl b/moment_kinetics/src/velocity_moments.jl index 096dae7b2..ebb1aeaa2 100644 --- a/moment_kinetics/src/velocity_moments.jl +++ b/moment_kinetics/src/velocity_moments.jl @@ -1010,6 +1010,60 @@ function calculate_electron_moment_derivatives!(moments, scratch, scratch_dummy, buffer_r_2, buffer_r_3, buffer_r_4, z_spectral, z) end +""" +Calculate spatial derivatives of the electron moments. + +This version, for use in implicit solvers for electrons, works with a single point in `r`, +given by `ir`. +""" +function calculate_electron_moment_derivatives_no_r!(moments, scratch, scratch_dummy, z, + z_spectral, electron_mom_diss_coeff, + ir) + begin_serial_region() + + dens = @view scratch.electron_density[:,ir] + upar = @view scratch.electron_upar[:,ir] + ppar = @view scratch.electron_ppar[:,ir] + qpar = @view moments.electron.qpar[:,ir] + vth = @view moments.electron.vth[:,ir] + dummy_z = @view scratch_dummy.dummy_zrs[:,ir,1] + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + @views derivative_z!(moments.electron.dupar_dz[:,ir], upar, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + # centred second derivative for dissipation + if electron_mom_diss_coeff > 0.0 + derivative_z!(dummy_z, ppar, buffer_1, buffer_2, buffer_3, buffer_4, + z_spectral, z) + @views derivative_z!(moments.electron.d2ppar_dz2[:,ir], dummy_z, buffer_1, + buffer_2, buffer_3, buffer_4, z_spectral, z) + end + + @views derivative_z!(moments.electron.ddens_dz[:,ir], dens, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dppar_dz[:,ir], ppar, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dqpar_dz[:,ir], qpar, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dvth_dz[:,ir], vth, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + # calculate the zed derivative of the electron temperature + begin_z_region() + @loop_z iz begin + # store the temperature in dummy_zr + dummy_z[iz] = 2*ppar[iz,ir]/dens[iz,ir] + end + begin_serial_region() + @views derivative_z!(moments.electron.dT_dz[:,ir], dummy_z, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + @views derivative_z!(moments.electron.dvth_dz[:,ir], moments.electron.vth[:,ir], + buffer_1, buffer_2, buffer_3, buffer_4, z_spectral, z) +end + """ update velocity moments of the evolved neutral pdf """ diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 74033245d..bdb0f6f38 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -140,7 +140,9 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_ f_old = vpa.scratch7 .= f_old_no_bc apply_bc!(f_old) - #if nl_solver_params.stage_counter[] % nl_solver_params.preconditioner_update_interval == 0 + #if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval + # nl_solver_params.solves_since_precon_update[] = 0 + # advection_matrix = allocate_float(vpa.n, vpa.n) # advection_matrix .= 0.0 # for i ∈ 1:vpa.nelement_local @@ -309,8 +311,6 @@ function implicit_vpa_advection!(f_out, fvec_in, fields, moments, z_advect, vpa_ end end - nl_solver_params.stage_counter[] += 1 - return true end diff --git a/moment_kinetics/test/braginskii_electrons_imex_tests.jl b/moment_kinetics/test/braginskii_electrons_imex_tests.jl index 5a2ba1daa..1104271f3 100644 --- a/moment_kinetics/test/braginskii_electrons_imex_tests.jl +++ b/moment_kinetics/test/braginskii_electrons_imex_tests.jl @@ -277,24 +277,19 @@ function runtests() @testset "Braginskii electron IMEX timestepping" verbose=use_verbose begin println("Braginskii electron IMEX timestepping tests") - if Sys.isapple() - @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin - end - else - @testset "Split 3" begin - test_input["output"]["base_directory"] = test_output_directory - run_test(test_input, expected_p, expected_q, expected_vt) - end - @long @testset "Check other timestep - $type" for - type ∈ ("KennedyCarpenterARK437",) + @testset "Split 3" begin + test_input["output"]["base_directory"] = test_output_directory + run_test(test_input, expected_p, expected_q, expected_vt) + end + @long @testset "Check other timestep - $type" for + type ∈ ("KennedyCarpenterARK437",) - timestep_check_input = deepcopy(test_input) - timestep_check_input["output"]["base_directory"] = test_output_directory - timestep_check_input["output"]["run_name"] = type - timestep_check_input["timestepping"]["type"] = type - run_test(timestep_check_input, expected_p, expected_q, expected_vt, - rtol=2.e-4, atol=1.e-10) - end + timestep_check_input = deepcopy(test_input) + timestep_check_input["output"]["base_directory"] = test_output_directory + timestep_check_input["output"]["run_name"] = type + timestep_check_input["timestepping"]["type"] = type + run_test(timestep_check_input, expected_p, expected_q, expected_vt, + rtol=2.e-4, atol=1.e-10) end end diff --git a/moment_kinetics/test/fokker_planck_tests.jl b/moment_kinetics/test/fokker_planck_tests.jl index 8f2dafbd7..cd29e94ca 100644 --- a/moment_kinetics/test/fokker_planck_tests.jl +++ b/moment_kinetics/test/fokker_planck_tests.jl @@ -66,8 +66,8 @@ function runtests() @testset "Fokker Planck tests" verbose=use_verbose begin println("Fokker Planck tests") - @testset " - test Lagrange-polynomial 2D interpolation" begin - println(" - test Lagrange-polynomial 2D interpolation") + @testset "Lagrange-polynomial 2D interpolation" begin + println(" - test Lagrange-polynomial 2D interpolation") ngrid = 9 nelement_vpa = 16 nelement_vperp = 8 @@ -140,9 +140,9 @@ function runtests() end - @testset " - test weak-form 2D differentiation" begin + @testset "weak-form 2D differentiation" begin # tests the correct definition of mass and stiffness matrices in 2D - println(" - test weak-form 2D differentiation") + println(" - test weak-form 2D differentiation") ngrid = 9 nelement_vpa = 8 @@ -205,8 +205,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form Rosenbluth potential calculation: elliptic solve" begin - println(" - test weak-form Rosenbluth potential calculation: elliptic solve") + @testset "weak-form Rosenbluth potential calculation: elliptic solve" begin + println(" - test weak-form Rosenbluth potential calculation: elliptic solve") ngrid = 9 nelement_vpa = 8 nelement_vperp = 4 @@ -353,8 +353,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form collision operator calculation" begin - println(" - test weak-form collision operator calculation") + @testset "weak-form collision operator calculation" begin + println(" - test weak-form collision operator calculation") ngrid = 9 nelement_vpa = 8 nelement_vperp = 4 @@ -510,8 +510,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form (slowing-down) collision operator calculation" begin - println(" - test weak-form (slowing-down) collision operator calculation") + @testset "weak-form (slowing-down) collision operator calculation" begin + println(" - test weak-form (slowing-down) collision operator calculation") ngrid = 9 nelement_vpa = 16 nelement_vperp = 8 @@ -607,8 +607,8 @@ function runtests() finalize_comms!() end - @testset " - test weak-form Rosenbluth potential calculation: direct integration" begin - println(" - test weak-form Rosenbluth potential calculation: direct integration") + @testset "weak-form Rosenbluth potential calculation: direct integration" begin + println(" - test weak-form Rosenbluth potential calculation: direct integration") ngrid = 5 # chosen for a quick test -- direct integration is slow! nelement_vpa = 8 nelement_vperp = 4 diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl new file mode 100644 index 000000000..7e48cc80f --- /dev/null +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -0,0 +1,2637 @@ +module JacobianMatrixTests + +# Tests for construction of Jacobian matrices used for preconditioning + +include("setup.jl") + +using moment_kinetics: setup_moment_kinetics, cleanup_moment_kinetics! +using moment_kinetics.analysis: vpagrid_to_dzdt +using moment_kinetics.array_allocation: allocate_shared_float +using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!, + enforce_vperp_boundary_condition! +using moment_kinetics.derivatives: derivative_z! +using moment_kinetics.electron_fluid_equations: calculate_electron_qpar_from_pdf_no_r!, + electron_energy_equation_no_r!, + add_electron_energy_equation_to_Jacobian! +using moment_kinetics.electron_kinetic_equation: add_contribution_from_pdf_term!, + add_contribution_from_electron_pdf_term_to_Jacobian!, + add_dissipation_term!, + add_electron_dissipation_term_to_Jacobian!, + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!, + electron_kinetic_equation_euler_update!, + fill_electron_kinetic_equation_Jacobian! +using moment_kinetics.electron_vpa_advection: electron_vpa_advection!, + add_electron_vpa_advection_to_Jacobian! +using moment_kinetics.electron_z_advection: electron_z_advection!, + update_electron_speed_z!, + add_electron_z_advection_to_Jacobian! +using moment_kinetics.external_sources: total_external_electron_sources!, + add_total_external_electron_source_to_Jacobian! +using moment_kinetics.krook_collisions: electron_krook_collisions!, + add_electron_krook_collisions_to_Jacobian! +using moment_kinetics.looping +using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!, + add_electron_implicit_constraint_forcing_to_Jacobian!, + hard_force_moment_constraints! +using moment_kinetics.type_definitions: mk_float +using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r! + +using StatsBase + +# Small parameter used to create perturbations to test Jacobian against +epsilon = 1.0e-6 +test_wavenumber = 2.0 +dt = 0.42 +ion_dt = 1.0e-6 +ir = 1 +zero = 1.0e-14 + +# Test input uses `z_bc = "constant"`, which is not a very physically useful option, but +# is useful for testing because: +# * `z_bc = "wall"` would introduce discontinuities in the distribution function which +# might reduce accuracy and so make it harder to see whether errors are due to a mistake +# in the matrix construction or just due to discretisation error +# * For `z_bc = "periodic"`, the Jacobian matrices (by design) do not account for the +# periodicity. This should be fine when they are used as preconditioners, but does +# introduce errors at the periodic boundaries which would complicate testing. +test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix", + ), + "composition" => OptionsDict("n_ion_species" => 1, + "n_neutral_species" => 1, + "electron_physics" => "kinetic_electrons", + "recycling_fraction" => 0.5, + "T_e" => 1.0, + "T_wall" => 0.1, + ), + "evolve_moments" => OptionsDict("density" => true, + "parallel_flow" => true, + "parallel_pressure" => true, + "moments_conservation" => true, + ), + "ion_species_1" => OptionsDict("initial_density" => 1.0, + "initial_temperature" => 1.0, + ), + "z_IC_ion_species_1" => OptionsDict("initialization_option" => "sinusoid", + "density_amplitude" => 0.1, + "density_phase" => mk_float(π), + "upar_amplitude" => 0.1, + "upar_phase" => mk_float(π), + "temperature_amplitude" => 0.1, + "temperature_phase" => mk_float(π), + ), + "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0, + ), + "neutral_species_1" => OptionsDict("initial_density" => 1.0, + "initial_temperature" => 1.0, + ), + "z_IC_neutral_species_1" => OptionsDict("initialization_option" => "sinusoid", + "density_amplitude" => 0.001, + "density_phase" => mk_float(π), + "upar_amplitude" => 0.0, + "upar_phase" => mk_float(π), + "temperature_amplitude" => 0.0, + "temperature_phase" => mk_float(π), + ), + "vz_IC_neutral_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "upar_phase" => 0.0, + "temperature_amplitude" => 0.0, + "temperature_phase" => 0.0, + ), + "reactions" => OptionsDict("charge_exchange_frequency" => 0.75, + "ionization_frequency" => 0.0, + ), + "r" => OptionsDict("ngrid" => 1, + "nelement" => 1, + ), + "z" => OptionsDict("ngrid" => 9, + "nelement" => 16, + "bc" => "constant", + "discretization" => "gausslegendre_pseudospectral", + ), + "vpa" => OptionsDict("ngrid" => 6, + "nelement" => 31, + "L" => 12.0, + "bc" => "zero", + "discretization" => "gausslegendre_pseudospectral", + "element_spacing_option" => "coarse_tails", + ), + "vz" => OptionsDict("ngrid" => 6, + "nelement" => 31, + "L" => 12.0, + "bc" => "zero", + "discretization" => "gausslegendre_pseudospectral", + "element_spacing_option" => "coarse_tails", + ), + "timestepping" => Dict{String,Any}("type" => "KennedyCarpenterARK324", + "implicit_electron_advance" => false, + "implicit_electron_ppar" => true, + "implicit_ion_advance" => false, + "implicit_vpa_advection" => false, + "nstep" => 1, + "dt" => ion_dt, + "minimum_dt" => 1.0e-7, + "rtol" => 1.0e-4, + "max_increase_factor_near_last_fail" => 1.001, + "last_fail_proximity_factor" => 1.1, + "max_increase_factor" => 1.05, + "nwrite" => 10000, + "nwrite_dfns" => 10000, + "steady_state_residual" => true, + "converged_residual_value" => 1.0e-3, + ), + "electron_timestepping" => Dict{String,Any}("nstep" => 1, + "dt" => dt, + "maximum_dt" => 1.0, + "nwrite" => 10000, + "nwrite_dfns" => 100000, + "type" => "Fekete4(3)", + "rtol" => 1.0e-6, + "atol" => 1.0e-14, + "minimum_dt" => 1.0e-10, + "initialization_residual_value" => 2.5, + "converged_residual_value" => 1.0e-2, + "constraint_forcing_rate" => 2.321, + ), + "nonlinear_solver" => Dict{String,Any}("nonlinear_max_iterations" => 100, + "rtol" => 1.0e-5, + "atol" => 1.0e-15, + "preconditioner_update_interval" => 1, + ), + "ion_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 1.0e0, + "force_minimum_pdf_value" => 0.0, + ), + "electron_numerical_dissipation" => Dict{String,Any}("vpa_dissipation_coefficient" => 2.0, + "force_minimum_pdf_value" => 0.0, + ), + "neutral_numerical_dissipation" => Dict{String,Any}("vz_dissipation_coefficient" => 1.0e-1, + "force_minimum_pdf_value" => 0.0, + ), + "ion_source_1" => Dict{String,Any}("active" => true, + "z_profile" => "gaussian", + "z_width" => 0.125, + "source_strength" => 0.1, + "source_T" => 2.0, + ), + "krook_collisions" => Dict{String,Any}("use_krook" => true), + ) + +function get_mk_state(test_input) + mk_state = nothing + quietoutput() do + mk_state = setup_moment_kinetics(test_input; skip_electron_solve=true) + end + return mk_state +end +function cleanup_mk_state!(args...) + quietoutput() do + cleanup_moment_kinetics!(args...) + end + return nothing +end + +function generate_norm_factor(perturbed_residual::AbstractArray{mk_float,3}) + # half-width of the window for moving average + w = 3 + norm_factor_unsmoothed = mean(abs.(perturbed_residual); dims=3) + # Smooth the 'norm_factor' with a moving average to avoid problems due to places where + # norm_factor happens to be (almost) zero + norm_factor = similar(norm_factor_unsmoothed) + for i ∈ 1:w + norm_factor[i,1,1,1] = mean(norm_factor_unsmoothed[1:i+w,1,1,1]) + end + for i ∈ w+1:size(perturbed_residual, 1)-w + norm_factor[i,1,1,1] = mean(norm_factor_unsmoothed[i-w:i+w,1,1,1]) + end + for i ∈ 1:w + norm_factor[end+1-i,1,1,1] = mean(norm_factor_unsmoothed[end+1-i-w:end,1,1,1]) + end + return norm_factor +end +function generate_norm_factor(perturbed_residual::AbstractArray{mk_float,1}) + norm_factor_unsmoothed = mean(abs.(perturbed_residual); dims=1) +end + +# Quite a large multiplier in rtol for this test, but it is plausible that a nonlinear +# error (∼epsilon^2) could be multiplied by ∼vth*vpa.L/2∼sqrt(2)*60*6≈500. +function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_z_advection" + println(" - electron_z_advection") + + @testset "electron_z_advection" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + # Ensure initial electron distribution function obeys constraints + end + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_electron_z_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, + z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + electron_z_advection!(residual, this_f, upar, vth, z_advect, z, vpa.grid, + z_spectral, scratch_dummy, dt, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_vpa_advection" + println(" - electron_vpa_advection") + + @testset "electron_vpa_advection" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_electron_vpa_advection_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir; + ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + electron_vpa_advection!(residual, this_f, dens, upar, this_p, moments, + vpa_advect, vpa, vpa_spectral, scratch_dummy, dt, + external_source_settings.electron, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # Divide out the z-average of the magnitude of perturbed_residual from the + # difference, so that different orders of magnitude at different w_∥ are all + # tested sensibly, but occasional small values of the residual do not make the + # test fail. + # Since we have already normalised, pass `rtol` to `atol` for the comparison. + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_contribution_from_electron_pdf_term" + println(" - contribution_from_electron_pdf_term") + + @testset "contribution_from_electron_pdf_term" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + add_contribution_from_pdf_term!(residual, this_f, this_p, dens, upar, moments, + vpa.grid, z, dt, + external_source_settings.electron, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_dissipation_term" + println(" - electron_dissipation_term") + + @testset "electron_dissipation_term" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_electron_dissipation_term_to_Jacobian!( + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, + ir) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + add_dissipation_term!(residual, this_f, scratch_dummy, z_spectral, z, vpa, + vpa_spectral, num_diss_params, dt) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_krook_collisions" + println(" - electron_krook_collisions") + + @testset "electron_krook_collisions" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + # Modify upar_ion to make sure it is different from upar_electron so that the term + # proportional to (u_i-u_e) gets tested in case it is ever needed. + upar_ion = @view moments.ion.upar[:,ir,1] + @. upar_ion += sin(4.0*π*test_wavenumber*z.grid/z.L) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_electron_krook_collisions_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, @view(moments.ion.upar[:,ir]), + collisions, z, vperp, vpa, z_speed, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + @views electron_krook_collisions!(residual, this_f, dens, upar, + moments.ion.upar[:,ir], vth, collisions, + vperp, vpa, dt) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_external_electron_source" + println(" - external_electron_source") + + @testset "external_electron_source" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_total_external_electron_source_to_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, + z, vperp, vpa, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + total_external_electron_sources!(residual, this_f, dens, upar, moments, + composition, + external_source_settings.electron, vperp, + vpa, dt, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +# For this test where only the 'constraint forcing' term is added to the residual, the +# residual is exactly zero for the initial condition (because that is constructed to obey +# the constraints). Therefore the 'perturbed_residual' is non-zero only because of +# delta_f, which is small, O(epsilon), so 'norm_factor' is also O(epsilon). We therefore +# use a tolerance of O(epsilon) in this test, unlike the other tests which use a tolerance +# of O(epsilon^2). Note that in the final test of the full electron kinetic equations, +# with all terms including this one, we do not have a similar issue, as there the other +# terms create an O(1) residual for the initial condition, which will then set the +# 'norm_factor'. +# +# We test the Jacobian for these constraint forcing terms using +# constraint_forcing_rate=O(1), because in these tests we set dt=O(1), so a large +# coefficient would make the non-linearity large and then it would be hard to distinguish +# errors from non-linearity (or rounding errors) in `test_electron_kinetic_equation()` +# that tests the combined effect of all terms in the electron kinetic equation. This test +# would actually be OK because the ratio of linear to non-linear contributions of this +# single term does not depend on the size of the coefficient. In the combined test, we are +# effectively comparing the non-linear error from this term to the residual from other +# terms, so the coefficient of this term matters there. Even though these settings are not +# what we would use in a real simulation, they should tell us if the implementation is +# correct. +function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsilon)) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_implicit_constraint_forcing" + println(" - electron_implicit_constraint_forcing") + + @testset "electron_implicit_constraint_forcing" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_electron_implicit_constraint_forcing_to_Jacobian!( + jacobian_matrix, f, z_speed, z, vperp, vpa, + t_params.electron.constraint_forcing_rate, dt, ir) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + electron_implicit_constraint_forcing!(residual, this_f, + t_params.electron.constraint_forcing_rate, + vpa, dt, ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + # No norm factor, because both perturbed residuals should be zero here, as + # delta_p does not affect this term, and `f` (with no `delta_f`) obeys the + # constraints exactly, so this term vanishes. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=1.0e-15) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + delta_state[pdf_size+1:end]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n) ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_energy_equation" + println(" - electron_energy_equation") + + @testset "electron_energy_equation" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + begin_serial_region() + @serial_region begin + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_electron_energy_equation_to_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, + z_spectral, num_diss_params, dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_region() + @loop_z iz begin + residual[iz] = ppar[iz] + end + @views electron_energy_equation_no_r!( + residual, this_p, dens, upar, moments.ion.dens[:,ir], + moments.ion.upar[:,ir], moments.ion.ppar[:,ir], + moments.neutral.dens[:,ir], moments.neutral.uz[:,ir], + moments.neutral.pz[:,ir], moments.electron, collisions, dt, + composition, external_source_settings.electron, num_diss_params, z, + ir) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_region() + @loop_z iz begin + residual[iz] = this_p[iz] - residual[iz] + end + end + + original_residual = allocate_shared_float(size(ppar)...) + perturbed_residual = allocate_shared_float(size(ppar)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + zeros(pdf_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_ion_dt_forcing_of_electron_ppar" + println(" - ion_dt_forcing_of_electron_ppar") + + @testset "ion_dt_forcing_of_electron_ppar" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( + jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_region() + @loop_z iz begin + residual[iz] = ppar[iz] + end + ppar_previous_ion_step = moments.electron.ppar + begin_z_region() + @loop_z iz begin + # At this point, ppar_out = ppar_in + dt*RHS(ppar_in). Here we add a + # source/damping term so that in the steady state of the electron + # pseudo-timestepping iteration, + # RHS(ppar) - (ppar - ppar_previous_ion_step) / ion_dt = 0, + # resulting in a backward-Euler step (as long as the pseudo-timestepping + # loop converges). + residual[iz] += -dt * (this_p[iz] - ppar_previous_ion_step[iz,ir]) / ion_dt + end + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_region() + @loop_z iz begin + residual[iz] = this_p[iz] - residual[iz] + end + end + + original_residual = allocate_shared_float(size(ppar)...) + perturbed_residual = allocate_shared_float(size(ppar)...) + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + # No norm factor, because both perturbed residuals should be zero here, as + # delta_f does not affect this term, and `ppar` is used as + # `ppar_previous_ion_step` in this test, so the residuals are exactly zero if + # there is no delta_p. + @test elementwise_isapprox(perturbed_residual, + perturbed_with_Jacobian; + rtol=0.0, atol=1.0e-15) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f, ppar .+ delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check f did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + zeros(pdf_size); atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[pdf_size+1:end] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[1:pdf_size], + delta_state[1:pdf_size]; atol=1.0e-15) + + norm_factor = generate_norm_factor(perturbed_residual) + @test elementwise_isapprox(perturbed_residual ./ norm_factor, + perturbed_with_Jacobian ./ norm_factor; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_kinetic_equation" + println(" - electron_kinetic_equation") + + @testset "electron_kinetic_equation" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + z_spectral = spectral_objects.z_spectral + vperp_spectral = spectral_objects.vperp_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude * sin(2.0*π*test_wavenumber*z.grid/z.L) + + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + begin_serial_region() + @serial_region begin + delta_f .= f_amplitude .* + reshape(sin.(2.0.*π.*test_wavenumber.*z.grid./z.L), 1, 1, z.n) .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix[row,row] = 1.0 + end + end + + fill_electron_kinetic_equation_Jacobian!( + jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, + true) + + function residual_func!(residual_f, residual_p, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + # Calculate heat flux and derivatives using new_variables + calculate_electron_qpar_from_pdf_no_r!(qpar, this_p, vth, this_f, vpa, ir) + + calculate_electron_moment_derivatives_no_r!( + moments, + (electron_density=dens, + electron_upar=upar, + electron_ppar=this_p), + scratch_dummy, z, z_spectral, + num_diss_params.electron.moment_dissipation_coefficient, ir) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + begin_z_region() + @loop_z iz begin + residual_p[iz] = ppar[iz] + end + electron_kinetic_equation_euler_update!( + residual_f, residual_p, this_f, this_p, moments, z, vperp, vpa, + z_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, collisions, + composition, external_source_settings, num_diss_params, t_params.electron, + ir; evolve_ppar=true, ion_dt=ion_dt) + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual_f[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual_f[ivpa,ivperp,iz] + end + begin_z_region() + @loop_z iz begin + residual_p[iz] = this_p[iz] - residual_p[iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual_f[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual_f, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + # Boundary conditions on incoming part of distribution function. Note + # that as density, upar, ppar do not change in this implicit step, + # f_electron_newvar, f_old, and residual should all be zero at exactly + # the same set of grid points, so it is reasonable to zero-out + # `residual` to impose the boundary condition. We impose this after + # subtracting f_old in case rounding errors, etc. mean that at some + # point f_old had a different boundary condition cut-off index. + begin_vperp_vpa_region() + v_unnorm = vpa.scratch + zero = 1.0e-14 + if z.irank == 0 + iz = 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] > -zero + residual_f[ivpa,ivperp,iz] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + iz = z.n + v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual_f[ivpa,ivperp,iz] = 0.0 + end + end + end + end + return nothing + end + + original_residual_f = allocate_shared_float(size(f)...) + original_residual_p = allocate_shared_float(size(ppar)...) + perturbed_residual_f = allocate_shared_float(size(f)...) + perturbed_residual_p = allocate_shared_float(size(ppar)...) + + @testset "δf only" begin + residual_func!(original_residual_f, original_residual_p, f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end + end + + @testset "δp only" begin + residual_func!(original_residual_f, original_residual_p, f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual_f, original_residual_p, f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar.+delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + delta_state[1:pdf_size] .= vec(delta_f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] + perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] + + norm_factor_f = generate_norm_factor(perturbed_residual_f) + @test elementwise_isapprox(perturbed_residual_f ./ norm_factor_f, + reshape(perturbed_with_Jacobian_f, vpa.n, vperp.n, z.n) ./ norm_factor_f; + rtol=0.0, atol=rtol) + norm_factor_p = generate_norm_factor(perturbed_residual_p) + @test elementwise_isapprox(perturbed_residual_p ./ norm_factor_p, + perturbed_with_Jacobian_p ./ norm_factor_p; + rtol=0.0, atol=rtol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + +function runtests() + # Create a temporary directory for test output + test_output_directory = get_MPI_tempdir() + test_input["output"]["base_directory"] = test_output_directory + + @testset "Jacobian matrix" verbose=use_verbose begin + println("Jacobian matrix") + + test_electron_z_advection(test_input) + test_electron_vpa_advection(test_input) + test_contribution_from_electron_pdf_term(test_input) + test_electron_dissipation_term(test_input) + test_electron_krook_collisions(test_input) + test_external_electron_source(test_input) + test_electron_implicit_constraint_forcing(test_input) + test_electron_energy_equation(test_input) + test_ion_dt_forcing_of_electron_ppar(test_input) + test_electron_kinetic_equation(test_input) + end + + if global_rank[] == 0 + # Delete output directory to avoid using too much disk space + rm(realpath(test_output_directory); recursive=true) + end + + return nothing +end + +end # JacobianMatrixTests + + +using .JacobianMatrixTests + +JacobianMatrixTests.runtests() diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl new file mode 100644 index 000000000..723da1be2 --- /dev/null +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -0,0 +1,286 @@ +module KineticElectronsTests + +# Regression test with kinetic electrons, using wall boundary conditions, with recycling +# fraction less than 1 and a plasma source. Runs a Boltzmann electron simulation, restarts +# as a kinetic electron simulation, and checks the final Ez profile. + +include("setup.jl") + +using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, + postproc_load_variable +using moment_kinetics.looping + +using moment_kinetics.Glob + +# Input for Boltzmann electron part of run +boltzmann_input = OptionsDict( + "output" => OptionsDict("run_name" => "kinetic_electron_test_boltzmann_initialisation", + ), + "evolve_moments" => OptionsDict("parallel_pressure" => true, + "density" => true, + "moments_conservation" => true, + "parallel_flow" => true, + ), + "r" => OptionsDict("ngrid" => 1, + "nelement" => 1, + ), + "z" => OptionsDict("ngrid" => 5, + "discretization" => "gausslegendre_pseudospectral", + "nelement" => 8, + "bc" => "wall", + ), + "vpa" => OptionsDict("ngrid" => 6, + "discretization" => "gausslegendre_pseudospectral", + "nelement" => 17, + "L" => 24.0, + "element_spacing_option" => "coarse_tails", + "bc" => "zero", + ), + "composition" => OptionsDict("T_e" => 0.2, + "n_ion_species" => 1, + "n_neutral_species" => 0, + ), + "ion_species_1" => OptionsDict("initial_temperature" => 0.2, + "initial_density" => 1.0, + ), + "z_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "temperature_amplitude" => 0.0, + "density_phase" => 0.0, + "upar_amplitude" => 1.0, + "temperature_phase" => 0.0, + "upar_phase" => 0.0, + ), + "vpa_IC_ion_species_1" => OptionsDict("initialization_option" => "gaussian", + "density_amplitude" => 1.0, + "temperature_amplitude" => 0.0, + "density_phase" => 0.0, + "upar_amplitude" => 0.0, + "temperature_phase" => 0.0, + "upar_phase" => 0.0, + ), + "krook_collisions" => OptionsDict("use_krook" => true, + ), + "reactions" => OptionsDict("electron_ionization_frequency" => 0.0, + "ionization_frequency" => 0.5, + "charge_exchange_frequency" => 0.75, + ), + "ion_source_1" => OptionsDict("active" => true, + "z_profile" => "gaussian", + "z_width" => 0.25, + "source_strength" => 2.0, + "source_T" => 2.0, + ), + "ion_source_2" => OptionsDict("active" => true, + "z_profile" => "wall_exp_decay", + "z_width" => 0.25, + "source_strength" => 0.5, + "source_T" => 0.2, + ), + "timestepping" => OptionsDict("type" => "SSPRK4", + "nstep" => 20000, + "dt" => 1.0e-4, + "nwrite" => 2500, + "nwrite_dfns" => 2500, + "steady_state_residual" => true, + ), + "ion_numerical_dissipation" => OptionsDict("force_minimum_pdf_value" => 0.0, + ), + "electron_numerical_dissipation" => OptionsDict("force_minimum_pdf_value" => 0.0, + ), + ) + +# Test use distributed-memory when possible +boltzmann_input["z"]["nelement_local"] = boltzmann_input["z"]["nelement"] ÷ gcd(boltzmann_input["z"]["nelement"], global_size[]) + +kinetic_input = deepcopy(boltzmann_input) +kinetic_input["output"]["run_name"] = "kinetic_electron_test" +kinetic_input["composition"]["electron_physics"] = "kinetic_electrons" +kinetic_input["timestepping"] = OptionsDict("type" => "PareschiRusso2(2,2,2)", + "implicit_electron_advance" => false, + "implicit_electron_ppar" => true, + "implicit_ion_advance" => false, + "implicit_vpa_advection" => false, + "nstep" => 100, + "dt" => 1.0e-5, + "nwrite" => 100, + "nwrite_dfns" => 100, + ) + +kinetic_input["electron_timestepping"] = OptionsDict("nstep" => 5000000, + "dt" => 5.0e-6, + "nwrite" => 10000, + "nwrite_dfns" => 100000, + "decrease_dt_iteration_threshold" => 5000, + "increase_dt_iteration_threshold" => 0, + "cap_factor_ion_dt" => 10.0, + "initialization_residual_value" => 1.0e10, + "converged_residual_value" => 1.0e-1, + ) + +kinetic_input["nonlinear_solver"] = OptionsDict("nonlinear_max_iterations" => 1000, + "rtol" => 1.0e-8, + "atol" => 1.0e-14, + "linear_restart" => 5, + "preconditioner_update_interval" => 100, + ) + + +""" +Run a test for a single set of parameters +""" +function run_test() + test_output_directory = get_MPI_tempdir() + + this_boltzmann_input = deepcopy(boltzmann_input) + this_boltzmann_input["output"]["base_directory"] = test_output_directory + + this_kinetic_input = deepcopy(kinetic_input) + this_kinetic_input["output"]["base_directory"] = test_output_directory + + # Provide some progress info + println(" - testing kinetic electrons") + + # Suppress console output while running? Test is pretty long, so maybe better to leave + # intermediate output visible. Leaving `quietoutput()` commented out for now... + quietoutput() do + run_moment_kinetics(this_boltzmann_input) + + restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"]) + restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5" + restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1] + + # run kinetic electron simulation + run_moment_kinetics(this_kinetic_input; restart=restart_from_file) + end + + if global_rank[] == 0 + # Load and analyse output + ######################### + + path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"]) + + # open the output file(s) + run_info = get_run_info_no_setup(path, dfns=true) + + # load fields data + Ez = postproc_load_variable(run_info, "Ez")[:,1,:] + vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:] + electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end] + + close_run_info(run_info) + + # Regression test + # Benchmark data generated in serial on Linux + expected_Ez = [-0.5990683230706185 -0.604849806235434; + -0.4944296396481284 -0.49739671491727844; + -0.30889032954504736 -0.30924318765687464; + -0.2064830747303776 -0.20682475071884582; + -0.21232457328748663 -0.21299072376949116; + -0.18233875912042674 -0.18256905463006085; + -0.16711429522309232 -0.1673112962636778; + -0.16920776495088916 -0.1693227707158167; + -0.1629417555658927 -0.16304933113558318; + -0.16619150334079993 -0.16629539618289285; + -0.15918194883360942 -0.1592799009526323; + -0.14034706409006803 -0.140437217833422; + -0.12602184032280567 -0.12610387949683538; + -0.10928716440800472 -0.10935785133612701; + -0.07053969674257217 -0.07058573063123225; + -0.0249577746169536 -0.024974174596810936; + -2.8327303308330514e-15 -1.441401377024236e-10; + 0.024957774616960776 0.02497417427570905; + 0.07053969674257636 0.07058572965952663; + 0.10928716440799909 0.10935785264749627; + 0.1260218403227975 0.12610388283669527; + 0.1403470640900294 0.1404372197714126; + 0.1591819488336015 0.15927992284761766; + 0.16619150334082114 0.1662953275454769; + 0.16294175556587748 0.1630489871826757; + 0.16920776495090983 0.1693233489685909; + 0.1671142952230893 0.16731075590341918; + 0.1823387591204167 0.1825740389953209; + 0.21232457328753865 0.21297925141919793; + 0.20648307473037922 0.20682690396901446; + 0.3088903295450278 0.30925854110074175; + 0.4944296396481271 0.49731601862961966; + 0.5990683230705801 0.6046564647413697] + expected_vthe = [27.08102229345079 27.08346736523219; + 27.087730258479823 27.089003820908527; + 27.091898844901323 27.09181784480061; + 27.092455021687254 27.092742387764524; + 27.09350739287911 27.094148133125078; + 27.093817059011126 27.093889601910092; + 27.09443981315218 27.094610141036807; + 27.09484177005478 27.094996783801374; + 27.094985914811055 27.0950626278904; + 27.095122128675094 27.09525702879687; + 27.09536357532887 27.09548558966323; + 27.095582117080163 27.095716810823177; + 27.09568783962135 27.09578276803757; + 27.0957775472326 27.095878610625554; + 27.095909169276535 27.09600041573683; + 27.095978269355648 27.096074922150624; + 27.095988166679223 27.096080134292468; + 27.095978269713978 27.096074922508883; + 27.095909171602027 27.096000418062378; + 27.09577755035281 27.095878613746088; + 27.095687838236376 27.095782766652857; + 27.09558211622511 27.095716809968053; + 27.09536353456768 27.09548554890375; + 27.095122105596843 27.095257005693973; + 27.094986093051983 27.09506280663278; + 27.094841563692096 27.094996577040796; + 27.094439553087433 27.094609881510113; + 27.093813728418613 27.09388627063591; + 27.093489818175936 27.094130555874184; + 27.09246140309467 27.092748772044477; + 27.09185903467811 27.09177803239964; + 27.08774827015981 27.089021820036553; + 27.081240668889404 27.0836857414255] + + if expected_Ez == nothing + # Error: no expected input provided + println("data tested would be: Ez=", Ez) + @test false + else + @test isapprox(Ez, expected_Ez, rtol=1.0e-7, atol=1.0e-9) + end + if expected_vthe == nothing + # Error: no expected input provided + println("data tested would be: vthe=", vthe) + @test false + else + @test isapprox(vthe, expected_vthe, rtol=2.0e-9, atol=0.0) + end + + # Iteration counts are fairly inconsistent, but it's good to check that they at + # least don't unexpectedly increase by an order of magnitude. + # Expected iteration count is from a serial run on Linux. + expected_electron_advance_linear_iterations = 11394 + @test electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations + if !(electron_advance_linear_iterations < 2.0 * expected_electron_advance_linear_iterations) + println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") + end + end + + if global_rank[] == 0 + # Delete output directory to avoid using too much disk space + rm(realpath(test_output_directory); recursive=true) + end +end + +function runtests() + @testset "kinetic electrons" begin + println("Kinetic electron tests") + run_test() + end + return nothing +end + +end # KineticElectronsTests + + +using .KineticElectronsTests + +KineticElectronsTests.runtests() diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index 29fe633c5..ab68389f4 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -58,7 +58,7 @@ function linear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, @@ -171,7 +171,7 @@ function nonlinear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, @@ -272,18 +272,10 @@ function nonlinear_test() end function runtests() - if Sys.isapple() - @testset_skip "MINPACK is broken on macOS (https://github.com/sglyon/MINPACK.jl/issues/18)" "non-linear solvers" begin - println("non-linear solver tests") - linear_test() - nonlinear_test() - end - else - @testset "non-linear solvers" begin - println("non-linear solver tests") - linear_test() - nonlinear_test() - end + @testset "non-linear solvers" begin + println("non-linear solver tests") + linear_test() + nonlinear_test() end end diff --git a/moment_kinetics/test/runtests.jl b/moment_kinetics/test/runtests.jl index 73d688f06..26a1a863e 100644 --- a/moment_kinetics/test/runtests.jl +++ b/moment_kinetics/test/runtests.jl @@ -20,6 +20,8 @@ function runtests() include(joinpath(@__DIR__, "fokker_planck_tests.jl")) include(joinpath(@__DIR__, "fokker_planck_time_evolution_tests.jl")) include(joinpath(@__DIR__, "gyroaverage_tests.jl")) + include(joinpath(@__DIR__, "jacobian_matrix_tests.jl")) + include(joinpath(@__DIR__, "kinetic_electron_tests.jl")) end end diff --git a/moment_kinetics/test/setup.jl b/moment_kinetics/test/setup.jl index 7e9ec9cc8..c269231f6 100644 --- a/moment_kinetics/test/setup.jl +++ b/moment_kinetics/test/setup.jl @@ -13,7 +13,8 @@ using moment_kinetics module MKTestUtilities export use_verbose, force_optional_dependencies, @long, quietoutput, get_MPI_tempdir, - global_rank, global_size, maxabs_norm, @testset_skip, recursive_merge, OptionsDict + global_rank, global_size, maxabs_norm, elementwise_isapprox, @testset_skip, + recursive_merge, OptionsDict using moment_kinetics.communication: comm_world, global_rank, global_size using moment_kinetics.command_line_options: get_options @@ -85,6 +86,18 @@ between two arrays. """ maxabs_norm(x) = maximum(abs.(x)) +""" + elementwise_isapprox(args...; kwargs...) + +Calls `isapprox()` but forces the comparison to be done element-by-element, rather than +testing `norm(x-y)NaN, kwargs...) +end + """ Get a single temporary directory that is the same on all MPI ranks """ diff --git a/precompile-with-check-bounds.jl b/precompile-with-check-bounds.jl new file mode 100644 index 000000000..b7be917bb --- /dev/null +++ b/precompile-with-check-bounds.jl @@ -0,0 +1,16 @@ +using Pkg + +# Activate the moment_kinetics package +Pkg.activate(".") + +using PackageCompiler + +# Create the sysimage 'moment_kinetics.so' in the base moment_kinetics source directory +# with both moment_kinetics and the dependencies listed above precompiled. +# Warning: editing the code will not affect what runs when using this .so, you +# need to re-precompile if you change anything. +create_sysimage(; sysimage_path="moment_kinetics.so", + precompile_execution_file="util/precompile_run.jl", + include_transitive_dependencies=false, # This is needed to make MPI work, see https://github.com/JuliaParallel/MPI.jl/issues/518 + sysimage_build_args=`-O3 --check-bounds=yes`, + ) diff --git a/util/calculate_rk_coeffs.jl b/util/calculate_rk_coeffs.jl index f4f3c366d..5dc9fa686 100644 --- a/util/calculate_rk_coeffs.jl +++ b/util/calculate_rk_coeffs.jl @@ -459,12 +459,11 @@ function convert_butcher_tableau_for_moment_kinetics(a::Matrix{Rational{Int64}}, end function convert_rk_coefs_to_butcher_tableau(rk_coefs::AbstractArray{T,N}, - adaptive, + adaptive, low_storage, rk_coefs_implicit=zeros(T, size(rk_coefs, 1) - 1, size(rk_coefs, 2) + 1), implicit_coefficient_is_zero=nothing ) where {T,N} using_rationals = eltype(rk_coefs) <: Rational || eltype(rk_coefs_implicit) <: Rational - low_storage = size(rk_coefs, 1) == 3 if adaptive n_rk_stages = size(rk_coefs, 2) - 1 else @@ -654,7 +653,7 @@ function convert_and_check_butcher_tableau(name, a, b, # Consistency check: converting back should give the original a, b. a_check, b_check, a_check_implicit, b_check_implicit = - convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) + convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, low_storage, rk_coefs_implicit, implicit_coefficient_is_zero) if eltype(a) == Rational if a_check != a @@ -704,7 +703,7 @@ function convert_and_check_butcher_tableau(name, a, b, end end -function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, +function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, low_storage=true, rk_coefs_implicit=zeros(eltype(rk_coefs), size(rk_coefs, 1), size(rk_coefs, 2) + 1), @@ -717,7 +716,7 @@ function convert_and_check_rk_coefs(name, rk_coefs, adaptive=false, if imex print("rk_coefs_implicit="); display(rk_coefs_implicit) end - a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, rk_coefs_implicit, implicit_coefficient_is_zero) + a, b, a_implicit, b_implicit = convert_rk_coefs_to_butcher_tableau(rk_coefs, adaptive, low_storage, rk_coefs_implicit, implicit_coefficient_is_zero) print("a="); display(a) print("b="); display(b) if imex @@ -1066,3 +1065,71 @@ convert_and_check_butcher_tableau( Rational{BigInt}[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100], ; low_storage=false) + +# 2nd-order, 2-stage IMEX method 'IMEX-SSP2(2,2,2)' from Pareschi & Russo 2005, Table II +# (https://doi.org/10.1007/s10915-004-4636-4) +gamma = 1 - 1 / sqrt(BigFloat(2)) +convert_and_check_butcher_tableau( + "PareschiRusso2(2,2,2)", + BigFloat[0 0; + 1 0; + ], + BigFloat[1//2 1//2], + BigFloat[gamma 0 ; + 1-2*gamma gamma; + ], + BigFloat[1//2 1//2], + ; low_storage=false) + +# 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,2,2)' from Pareschi & Russo 2005, Table III +# (https://doi.org/10.1007/s10915-004-4636-4) +convert_and_check_butcher_tableau( + "PareschiRusso2(3,2,2)", + Rational{Int64}[0 0 0; + 0 0 0; + 0 1 0; + ], + Rational{Int64}[0 1//2 1//2], + Rational{Int64}[ 1//2 0 0 ; + -1//2 1//2 0 ; + 0 1//2 1//2; + ], + Rational{Int64}[0 1//2 1//2], + ; low_storage=false) + +# 2nd-order, 3-stage IMEX method 'IMEX-SSP2(3,3,2)' from Pareschi & Russo 2005, Table IV +# (https://doi.org/10.1007/s10915-004-4636-4) +convert_and_check_butcher_tableau( + "PareschiRusso2(3,3,2)", + Rational{Int64}[0 0 0; + 1//2 0 0; + 1//2 1//2 0; + ], + Rational{Int64}[1//3 1//3 1//3], + Rational{Int64}[1//4 0 0 ; + 0 1//4 0 ; + 1//3 1//3 1//3; + ], + Rational{Int64}[1//3 1//3 1//3], + ; low_storage=false) + +# 3rd-order, 4-stage IMEX method 'IMEX-SSP3(4,3,3)' from Pareschi & Russo 2005, Table VI +# (https://doi.org/10.1007/s10915-004-4636-4) +alpha = 0.24169426078821 +beta = 0.06042356519705 +eta = 0.12915286960590 +convert_and_check_butcher_tableau( + "PareschiRusso3(4,3,3)", + typeof(alpha)[0 0 0 0; + 0 0 0 0; + 0 1 0 0; + 0 1//4 1//4 0; + ], + typeof(alpha)[0 1//6 1//6 2//3], + typeof(alpha)[alpha 0 0 0 ; + -alpha alpha 0 0 ; + 0 1-alpha alpha 0 ; + beta eta 1//2-beta-eta-alpha alpha; + ], + typeof(alpha)[0 1//6 1//6 2//3], + ; low_storage=false) diff --git a/util/precompile_run.jl b/util/precompile_run.jl index 09158e055..40ed0e57b 100644 --- a/util/precompile_run.jl +++ b/util/precompile_run.jl @@ -89,6 +89,9 @@ geo_input1 = recursive_merge(wall_bc_cheb_input, OptionsDict("composition" => Op kinetic_electron_input = recursive_merge(cheb_input, OptionsDict("evolve_moments" => OptionsDict("density" => true, "parallel_flow" => true, "parallel_pressure" => true), + "z" => OptionsDict("discretization" => "gausslegendre_pseudospectral"), + "vpa" => OptionsDict("discretization" => "gausslegendre_pseudospectral"), + "vz" => OptionsDict("discretization" => "gausslegendre_pseudospectral"), "r" => OptionsDict("ngrid" => 1, "nelement" => 1), "vperp" => OptionsDict("ngrid" => 1, diff --git a/util/precompile_run_kinetic-electrons.jl b/util/precompile_run_kinetic-electrons.jl index 28e8957e7..fd3e54ca7 100644 --- a/util/precompile_run_kinetic-electrons.jl +++ b/util/precompile_run_kinetic-electrons.jl @@ -18,36 +18,36 @@ input = OptionsDict("output" => OptionsDict("run_name" => "precompilation", "r" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "periodic", - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "z" => OptionsDict("ngrid" => 5, "nelement" => 4, "bc" => "wall", - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vperp" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "zero", "L" => 4.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vpa" => OptionsDict("ngrid" => 7, "nelement" => 8, "bc" => "zero", "L" => 8.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vzeta" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "zero", "L" => 4.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vr" => OptionsDict("ngrid" => 1, "nelement" => 1, "bc" => "zero", "L" => 4.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "vz" => OptionsDict("ngrid" => 7, "nelement" => 8, "bc" => "zero", "L" => 8.0, - "discretization" => "chebyshev_pseudospectral"), + "discretization" => "gausslegendre_pseudospectral"), "timestepping" => OptionsDict("nstep" => 1, "dt" => 2.0e-11), "electron_timestepping" => OptionsDict("nstep" => 1, diff --git a/util/test-rk-timestep.jl b/util/test-rk-timestep.jl index dd77ab06f..2282170aa 100644 --- a/util/test-rk-timestep.jl +++ b/util/test-rk-timestep.jl @@ -194,8 +194,9 @@ function rk_advance_butcher(a, b, y0, dt, nsteps, a_implicit=nothing, b_implicit error = zeros(nsteps+1) for it ∈ 1:nsteps - kscratch[1] = dt*f(y) kscratch_implicit[1] = dt*f_implicit(y, a_implicit[1,1] * dt) + ystage = backward_euler(y, dt * a_implicit[1,1]) + kscratch[1] = dt*f(ystage) for i ∈ 2:n_rk_stages ytilde = y + sum(a[i,j] * kscratch[j] for j ∈ 1:i-1) + @@ -324,9 +325,45 @@ methods = Dict( a_implicit = Float64[0 0 0 0; 1767732205903//4055673282236 1767732205903//4055673282236 0 0; 2746238789719//10658868560708 -640167445237//6845629431997 1767732205903//4055673282236 0; 1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236], b_implicit = Float64[1471266399579//7840856788654 -4482444167858//7529755066697 11266239266428//11593286722821 1767732205903//4055673282236; 2756255671327//12835298489170 -10771552573575//22201958757719 9247589265047//10645013368117 2193209047091//5459859503100] ), - ) -a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true) + "PareschiRusso2(2,2,2)" => (a=Float64[0.0 0.0; 1.0 0.0], + b=Float64[0.5 0.5], + a_implicit=Float64[0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 0.0; 0.4142135623730950488016887242096980785696718753769480731766797379907324784621193 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404], + b_implicit=Float64[0.5 0.5], + rk_coefs=Float64[-0.4142135623730950488016887242096980785696718753769480731766797379907324784621711 -0.5; 0.9999999999999999999999999999999999999999999999999999999999999999999999999999827 -1.207106781186547524400844362104849039284835937688474036588339868995366239231094; -0.0 0.5], + rk_coefs_implicit=Float64[0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 0.4142135623730950488016887242096980785696718753769480731766797379907324784621883 1.0; -0.0 0.2928932188134524755991556378951509607151640623115259634116601310046337607689404 1.207106781186547524400844362104849039284835937688474036588339868995366239231094], + implicit_coefficient_is_zero=Bool[0, 0], + ), + + "PareschiRusso2(3,2,2)" => (a=Float64[0 0 0; 0 0 0; 0 1 0], + b=Float64[0 1//2 1//2], + a_implicit=Float64[1//2 0 0; -1//2 1//2 0; 0 1//2 1//2], + b_implicit=Float64[0 1//2 1//2], + rk_coefs=Float64[2 -1 -1//2; 0 0 0; 0 1 -1//2; 0 0 1//2], + rk_coefs_implicit=Float64[1//2 -1 1 1//2; 0 1//2 0 1//2; 0 0 1//2 1//2], + implicit_coefficient_is_zero=Bool[0, 0, 0], + ), + + "PareschiRusso2(3,3,2)" => (a=Float64[0 0 0; 1//2 0 0; 1//2 1//2 0], + b=Float64[1//3 1//3 1//3], + a_implicit=Float64[1//4 0 0; 0 1//4 0; 1//3 1//3 1//3], + b_implicit=Float64[1//3 1//3 1//3], + rk_coefs=Float64[1 -4//3 -1//9; 1//2 -1//3 -4//9; 0 1//2 -1//3; 0 0 1//3], + rk_coefs_implicit=Float64[1//4 -1//2 4//3 4//9; 0 1//4 5//6 4//9; 0 0 1//3 2//3], + implicit_coefficient_is_zero=Bool[0, 0, 0], + ), + + "PareschiRusso3(4,3,3)" => (a=Float64[0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0; 0.0 0.25 0.25 0.0], + b=Float64[0.0 0.16666666666666666 0.16666666666666666 0.6666666666666666], + a_implicit=Float64[0.24169426078821 0.0 0.0 0.0; -0.24169426078821 0.24169426078821 0.0 0.0; 0.0 0.75830573921179 0.24169426078821 0.0; 0.06042356519705 0.1291528696059 0.06872930440884001 0.24169426078821], + b_implicit=Float64[0.0 0.16666666666666666 0.16666666666666666 0.6666666666666666], + rk_coefs=Float64[2.0 -5.27491721763532 0.9999999999999688 -0.1666666666666453; -0.0 0.0 1.4589197899688663e-17 0.0; -0.0 1.0 -0.0343646522044047 -0.500000000000007; -0.0 -0.0 0.25 -2.091639072545107; -0.0 -0.0 -0.0 0.6666666666666664], + rk_coefs_implicit=Float64[0.24169426078821 -1.0 3.13745860881766 1.0436096431476471e-14 0.16666666666665975; -0.0 0.24169426078821 2.13745860881766 -0.24999999999997924 0.3333333333333193; -0.0 -0.0 0.24169426078821 0.034364652204404655 0.500000000000007; -0.0 -0.0 -0.0 0.24169426078821 2.0916390725451066], + implicit_coefficient_is_zero=Bool[0, 0, 0, 0], + ), + ) + +a, b = convert_rk_coefs_to_butcher_tableau(methods["RKF45"].rk_coefs, true, false) methods["RKF45 attempt 2"] = (rk_coefs = methods["RKF45"].rk_coefs, a = a, b = b)