diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..9fba1454d --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,162 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "sqrt" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..f7bba1bfe --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml @@ -0,0 +1,161 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "sqrt" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..b4f1c6a59 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,163 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_2" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +maximum_dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..9fbb70e16 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml @@ -0,0 +1,161 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_2" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..1acf5c04d --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,162 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_4" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..ba5ae4f24 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml @@ -0,0 +1,161 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +bc = "wall" +element_spacing_option = "compressed_4" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10000 +nwrite_dfns = 100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml new file mode 100644 index 000000000..19bad4f94 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml @@ -0,0 +1,163 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +#nelement_local = 8 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10 #10000 +nwrite_dfns = 10 #100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 +include_wall_bc_in_preconditioner = true + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml new file mode 100644 index 000000000..052649878 --- /dev/null +++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml @@ -0,0 +1,162 @@ +[r] +ngrid = 1 +nelement = 1 + +[evolve_moments] +parallel_pressure = true +density = true +moments_conservation = true +parallel_flow = true + +[reactions] +electron_ionization_frequency = 0.0 +ionization_frequency = 0.5 +charge_exchange_frequency = 0.75 + +[vz] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 36.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[ion_species_1] +initial_temperature = 0.1 +initial_density = 1.0 + +[krook_collisions] +use_krook = true + +[vpa] +ngrid = 6 +discretization = "gausslegendre_pseudospectral" +nelement = 31 +L = 30.0 +element_spacing_option = "coarse_tails" +bc = "zero" + +[z] +ngrid = 5 +discretization = "gausslegendre_pseudospectral" +nelement = 32 +nelement_local = 4 +#nelement_local = 8 +#nelement_local = 16 +bc = "wall" + +[vpa_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_neutral_species_1] +initialization_option = "gaussian" +temperature_amplitude = 0.0 +density_amplitude = 0.001 +density_phase = 0.0 +upar_amplitude = -1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[composition] +T_wall = 0.1 +T_e = 0.2 +electron_physics = "kinetic_electrons" +recycling_fraction = 0.5 +n_ion_species = 1 +n_neutral_species = 1 + +[vz_IC_neutral_species_1] +initialization_option = "gaussian" +density_amplitude = 1.0 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 0.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[z_IC_ion_species_1] +initialization_option = "gaussian" +density_amplitude = 0.001 +temperature_amplitude = 0.0 +density_phase = 0.0 +upar_amplitude = 1.0 +temperature_phase = 0.0 +upar_phase = 0.0 + +[neutral_species_1] +initial_temperature = 1.0 +initial_density = 1.0 + +[timestepping] +type = "KennedyCarpenterARK324" +implicit_electron_advance = false +implicit_electron_ppar = true +implicit_ion_advance = false +implicit_vpa_advection = false +nstep = 1000000 +dt = 1.0e-5 +nwrite = 1000 +nwrite_dfns = 1000 +steady_state_residual = true +converged_residual_value = 1.0e-3 + +#write_after_fixed_step_count = true +#nstep = 1 +#nwrite = 1 +#nwrite_dfns = 1 + +[electron_timestepping] +nstep = 5000000 +#nstep = 1 +dt = 1.0e-5 +#maximum_dt = 1.0e-8 +nwrite = 10 #10000 +nwrite_dfns = 10 #100000 +#type = "SSPRK4" +type = "Fekete4(3)" +rtol = 1.0e-3 +atol = 1.0e-14 +minimum_dt = 1.0e-9 +decrease_dt_iteration_threshold = 100 +increase_dt_iteration_threshold = 20 +cap_factor_ion_dt = 10.0 +initialization_residual_value = 2.5 +converged_residual_value = 1.0e-2 + +#debug_io = 1 + +[nonlinear_solver] +nonlinear_max_iterations = 100 +rtol = 1.0e-6 #1.0e-8 +atol = 1.0e-14 #1.0e-16 +linear_restart = 5 +preconditioner_update_interval = 100 + +[ion_source_1] +active = true +z_profile = "gaussian" +z_width = 0.125 +source_strength = 2.0 +source_T = 2.0 + +[ion_numerical_dissipation] +#vpa_dissipation_coefficient = 1.0e-1 +#vpa_dissipation_coefficient = 1.0e-2 +#vpa_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 + +[electron_numerical_dissipation] +#vpa_dissipation_coefficient = 2.0 +force_minimum_pdf_value = 0.0 + +[neutral_numerical_dissipation] +#vz_dissipation_coefficient = 1.0e-1 +#vz_dissipation_coefficient = 1.0e-2 +#vz_dissipation_coefficient = 1.0e-3 +force_minimum_pdf_value = 0.0 diff --git a/moment_kinetics/debug_test/kinetic_electron_inputs.jl b/moment_kinetics/debug_test/kinetic_electron_inputs.jl index 4f834c319..b235efdc7 100644 --- a/moment_kinetics/debug_test/kinetic_electron_inputs.jl +++ b/moment_kinetics/debug_test/kinetic_electron_inputs.jl @@ -91,7 +91,12 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1, "neutral_numerical_dissipation" => OptionsDict("force_minimum_pdf_value" => 0.0, "vz_dissipation_coefficient" => 1e-2)) +test_input_adi = deepcopy(test_input) +test_input_adi["output"]["run_name"] = "kinetic_electron_adi" +test_input_adi["timestepping"]["implicit_electron_ppar"] = "adi" + test_input_list = [ test_input, + test_input_adi, ] diff --git a/moment_kinetics/src/chebyshev.jl b/moment_kinetics/src/chebyshev.jl index ad8139120..716ca6599 100644 --- a/moment_kinetics/src/chebyshev.jl +++ b/moment_kinetics/src/chebyshev.jl @@ -466,7 +466,7 @@ function chebyshev_spectral_derivative!(df,f) end function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, - chebyshev::chebyshev_base_info) + chebyshev::chebyshev_base_info, derivative::Val{0}) # Temporary buffer to store Chebyshev coefficients cheby_f = chebyshev.df @@ -495,6 +495,71 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c return nothing end +# Evaluate first derivative of the interpolating function +function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, + chebyshev::chebyshev_base_info, derivative::Val{1}) + # Temporary buffer to store Chebyshev coefficients + cheby_f = chebyshev.df + + if length(cheby_f) == 1 + # Chebyshev 'polynomial' is only a constant, so derivative must be zero? + result .= 0.0 + return nothing + end + + # Need to transform newgrid values to a scaled z-coordinate associated with the + # Chebyshev coefficients to get the interpolated function values. Transform is a + # shift and scale so that the element coordinate goes from -1 to 1 + shift = 0.5 * (coord.grid[imin] + coord.grid[imax]) + scale = 2.0 / (coord.grid[imax] - coord.grid[imin]) + + # Get Chebyshev coefficients + chebyshev_forward_transform!(cheby_f, chebyshev.fext, f, chebyshev.forward, coord.ngrid) + + if length(cheby_f) == 2 + # Handle this case specially because the generic algorithm below uses cheby_f[3] + # explicitly. + for i ∈ 1:length(newgrid) + x = newgrid[i] + z = scale * (x - shift) + + # cheby_f[2] is multiplied by U_0, but U_0 = 1 + result[i] = cheby_f[2] * scale + end + return nothing + end + + # Need to evaluate first derivatives of the Chebyshev polynomials. Can do this using + # the differentiation formula + # https://en.wikipedia.org/wiki/Chebyshev_polynomials#Differentiation_and_integration + # d T_n / dz = n * U_(n-1) + # where U_n are the Chebyshev polynomials of the second kind, which in turn can be + # evaluated using the recurrence relation + # https://en.wikipedia.org/wiki/Chebyshev_polynomials#Recurrence_definition + # U_0 = 1 + # U_1 = 2*z + # U_(n+1) = 2*z*U_n - U_(n-1) + for i ∈ 1:length(newgrid) + x = newgrid[i] + z = scale * (x - shift) + # Evaluate sum of Chebyshev polynomials at z using recurrence relation + U_nminus1 = 1.0 + U_n = 2.0*z + + # Note that the derivative of the zero'th Chebyshev polynomial (a constant) does + # not contribute, so cheby_f[1] is not used. + nplus1 = 2.0 + result[i] = cheby_f[2] * U_nminus1 * scale + cheby_f[3] * nplus1 * U_n * scale + for coef ∈ @view(cheby_f[4:end]) + nplus1 += 1.0 + U_nminus1, U_n = U_n, 2.0 * z * U_n - U_nminus1 + result[i] += coef * nplus1 * U_n * scale + end + end + + return nothing +end + """ returns wgts array containing the integration weights associated with all grid points for Clenshaw-Curtis quadrature diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index 373d7c8f8..7f71fd12a 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -8,7 +8,7 @@ export set_element_boundaries using LinearAlgebra using ..type_definitions: mk_float, mk_int, OptionsDict -using ..array_allocation: allocate_float, allocate_shared_float, allocate_int +using ..array_allocation: allocate_float, allocate_shared_float, allocate_int, allocate_shared_int using ..calculus: derivative! using ..chebyshev: scaled_chebyshev_grid, scaled_chebyshev_radau_grid, setup_chebyshev_pseudospectral using ..communication @@ -25,7 +25,7 @@ using OrderedCollections: OrderedDict """ structure containing basic information related to coordinates """ -struct coordinate{T <: AbstractVector{mk_float},Tbparams} +struct coordinate{T <: AbstractVector{mk_float}, Ti <: AbstractVector{mk_int}, Tbparams} # name is the name of the variable associated with this coordiante name::String # n_global is the total number of grid points associated with this coordinate @@ -95,6 +95,11 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams} scratch8::Array{mk_float,1} # scratch9 is an array used for intermediate calculations requiring n entries scratch9::Array{mk_float,1} + # scratch10 is an array used for intermediate calculations requiring n entries + scratch10::Array{mk_float,1} + # scratch_int_nelement_plus_1 is an integer array used for intermediate calculations + # requiring nelement+1 entries + scratch_int_nelement_plus_1::Array{mk_int,1} # scratch_shared is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared::T @@ -104,6 +109,12 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams} # scratch_shared3 is a shared-memory array used for intermediate calculations requiring # n entries scratch_shared3::T + # scratch_shared_int is a shared-memory array used for intermediate calculations + # requiring n integer entries + scratch_shared_int::Ti + # scratch_shared_int is a shared-memory array used for intermediate calculations + # requiring n integer entries + scratch_shared_int2::Ti # scratch_2d and scratch2_2d are arrays used for intermediate calculations requiring # ngrid x nelement entries scratch_2d::Array{mk_float,2} @@ -307,14 +318,21 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, duniform_dgrid = allocate_float(coord_input.ngrid, coord_input.nelement_local) # scratch is an array used for intermediate calculations requiring n entries scratch = allocate_float(n_local) + # scratch_int_nelement_plus_1 is an array used for intermediate calculations requiring + # nelement+1 entries + scratch_int_nelement_plus_1 = allocate_int(coord_input.nelement_local + 1) if ignore_MPI scratch_shared = allocate_float(n_local) scratch_shared2 = allocate_float(n_local) scratch_shared3 = allocate_float(n_local) + scratch_shared_int = allocate_int(n_local) + scratch_shared_int2 = allocate_int(n_local) else scratch_shared = allocate_shared_float(n_local) scratch_shared2 = allocate_shared_float(n_local) scratch_shared3 = allocate_shared_float(n_local) + scratch_shared_int = allocate_shared_int(n_local) + scratch_shared_int2 = allocate_shared_int(n_local) end # Initialise scratch_shared* so that the debug checks do not complain when they get # printed by `println(io, all_inputs)` in mk_input(). @@ -322,6 +340,8 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, scratch_shared .= NaN scratch_shared2 .= NaN scratch_shared3 .= NaN + scratch_shared_int .= typemin(mk_int) + scratch_shared_int2 .= typemin(mk_int) end if !ignore_MPI _block_synchronize() @@ -387,9 +407,10 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, coord_input.cheb_option, coord_input.bc, coord_input.boundary_parameters, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), - copy(scratch), scratch_shared, scratch_shared2, scratch_shared3, scratch_2d, - copy(scratch_2d), advection, send_buffer, receive_buffer, comm, local_io_range, - global_io_range, element_scale, element_shift, + copy(scratch), copy(scratch), scratch_int_nelement_plus_1, scratch_shared, + scratch_shared2, scratch_shared3, scratch_shared_int, scratch_shared_int2, + scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, comm, + local_io_range, global_io_range, element_scale, element_shift, coord_input.element_spacing_option, element_boundaries, radau_first_element, other_nodes, one_over_denominator) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 49b5e81fb..2bf8685a1 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -18,8 +18,8 @@ using ..calculus: derivative!, second_derivative!, integral, using ..communication using ..gauss_legendre: gausslegendre_info using ..input_structs -using ..interpolation: interpolate_to_grid_1d!, - interpolate_symmetric! +using ..interpolation: interpolate_to_grid_1d!, fill_1d_interpolation_matrix!, + interpolate_symmetric!, fill_interpolate_symmetric_matrix! using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float using ..electron_fluid_equations: calculate_electron_moments!, @@ -69,6 +69,8 @@ using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_deriv # Only needed so we can reference it in a docstring import ..runge_kutta +const integral_correction_sharpness = 4.0 + """ update_electron_pdf is a function that uses the electron kinetic equation to solve for the updated electron pdf @@ -895,7 +897,7 @@ global_rank[] == 0 && println("recalculating precon") nl_solver_params.preconditioners[ir] fill_electron_kinetic_equation_Jacobian!( - precon_matrix, f_electron_new, electron_ppar_new, moments, + precon_matrix, f_electron_new, electron_ppar_new, moments, phi, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, @@ -1097,10 +1099,10 @@ global_rank[] == 0 && println("recalculating precon") # guess is zero, fill_electron_kinetic_equation_Jacobian!( explicit_J, f_electron_new, electron_ppar_new, moments, - collisions, composition, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, :explicit_z, false) + phi, collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, + t_params, ion_dt, ir, evolve_ppar, :explicit_z, false) end begin_z_region() @loop_z iz begin @@ -1110,7 +1112,7 @@ global_rank[] == 0 && println("recalculating precon") A, f_electron_new[:,:,iz], electron_ppar_new[iz], dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, zeroth_moment[iz], first_moment[iz], second_moment[iz], - third_moment[iz], dthird_moment_dz[iz], collisions, + third_moment[iz], dthird_moment_dz[iz], phi[iz], collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, @@ -1150,7 +1152,7 @@ global_rank[] == 0 && println("recalculating precon") # Get sparse matrix for explicit, right-hand-side part of the # solve. fill_electron_kinetic_equation_Jacobian!( - explicit_J, f_electron_new, electron_ppar_new, moments, + explicit_J, f_electron_new, electron_ppar_new, moments, phi, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, @@ -1520,36 +1522,7 @@ global_rank[] == 0 && println("recalculating precon") vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) - # Boundary conditions on incoming part of distribution function. Note - # that as density, upar, ppar do not change in this implicit step, - # f_electron_newvar, f_old, and residual should all be zero at exactly - # the same set of grid points, so it is reasonable to zero-out - # `residual` to impose the boundary condition. We impose this after - # subtracting f_old in case rounding errors, etc. mean that at some - # point f_old had a different boundary condition cut-off index. - begin_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[1,ir], - moments.electron.upar[1,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] > -zero - f_electron_residual[ivpa,ivperp,1] = 0.0 - end - end - end - if z.irank == z.nrank - 1 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir], - moments.electron.upar[end,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] < zero - f_electron_residual[ivpa,ivperp,end] = 0.0 - end - end - end - end + zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir) return nothing end @@ -1649,10 +1622,56 @@ global_rank[] == 0 && println("recalculating precon") end end + new_lowerz_vcut_inds = r.scratch_shared_int + new_upperz_vcut_inds = r.scratch_shared_int2 apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, - num_diss_params, composition, ir) + num_diss_params, composition, ir; + lowerz_vcut_inds=new_lowerz_vcut_inds, + upperz_vcut_inds=new_upperz_vcut_inds) + # Check if either vcut_ind has changed - if either has, recalculate the + # preconditioner because the response at the grid point before the cutoff is + # sharp, so the preconditioner could be significantly wrong when it was + # calculated using the wrong vcut_ind. + lower_vcut_changed = @view z.scratch_shared_int[1:1] + upper_vcut_changed = @view z.scratch_shared_int[2:2] + @serial_region begin + if z.irank == 0 + precon_lowerz_vcut_inds = nl_solver_params.precon_lowerz_vcut_inds + if all(new_lowerz_vcut_inds .== precon_lowerz_vcut_inds) + lower_vcut_changed[] = 0 + else + lower_vcut_changed[] = 1 + precon_lowerz_vcut_inds .= new_lowerz_vcut_inds + end + end + MPI.Bcast!(lower_vcut_changed, comm_inter_block[]; root=0) + #req1 = MPI.Ibcast!(lower_vcut_changed, comm_inter_block[]; root=0) + + if z.irank == z.nrank - 1 + precon_upperz_vcut_inds = nl_solver_params.precon_upperz_vcut_inds + if all(new_upperz_vcut_inds .== precon_upperz_vcut_inds) + upper_vcut_changed[] = 0 + else + upper_vcut_changed[] = 1 + precon_upperz_vcut_inds .= new_upperz_vcut_inds + end + end + MPI.Bcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) + #req2 = MPI.Ibcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1) + + # Eventually can use Ibcast!() to make the two broadcasts run + # simultaneously, but need the function to be merged into MPI.jl (see + # https://github.com/JuliaParallel/MPI.jl/pull/882). + #MPI.Waitall([req1, req2]) + end + _block_synchronize() + if lower_vcut_changed[] == 1 || upper_vcut_changed[] == 1 + # One or both of the indices changed for some `ir`, so force the + # preconditioner to be recalculated next time. + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end if !evolve_ppar # update the electron heat flux @@ -1938,47 +1957,7 @@ to allow the outer r-loop to be parallelised. enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral, vperp_adv, vperp_diffusion) end - if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1) - # Boundary conditions on incoming part of distribution function. Note that - # as density, upar, ppar do not change in this implicit step, f_new, - # f_old, and residual should all be zero at exactly the same set of grid - # points, so it is reasonable to zero-out `residual` to impose the - # boundary condition. We impose this after subtracting f_old in case - # rounding errors, etc. mean that at some point f_old had a different - # boundary condition cut-off index. - begin_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - iz = 1 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], - fvec_in.electron_upar[iz,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] > -zero - f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 - end - end - end - if z.irank == z.nrank - 1 - iz = z.n - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], - fvec_in.electron_upar[iz,ir], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] < zero - f_electron_residual[ivpa,ivperp,iz,ir] = 0.0 - end - end - end - end - begin_z_region() - @loop_z iz begin - @views moment_constraints_on_residual!(f_electron_residual[:,:,iz], - f_electron_new[:,:,iz], - (evolve_density=true, - evolve_upar=true, - evolve_ppar=true), - vpa) - end + zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir) return nothing end @@ -2078,7 +2057,9 @@ end function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, - num_diss_params, composition) + num_diss_params, composition; + lowerz_vcut_inds=nothing, + upperz_vcut_inds=nothing) latest_pdf = this_scratch.pdf_electron begin_r_z_vperp_vpa_region() @@ -2093,7 +2074,8 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vp moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi, ir) + composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds, + upperz_vcut_inds=upperz_vcut_inds) end begin_r_z_region() @@ -2117,7 +2099,8 @@ end function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, num_diss_params, composition, - ir) + ir; lowerz_vcut_inds=nothing, + upperz_vcut_inds=nothing) begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin f_electron[ivpa,ivperp,iz] = max(f_electron[ivpa,ivperp,iz], 0.0) @@ -2128,7 +2111,8 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi, ir) + composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds, + upperz_vcut_inds=upperz_vcut_inds) begin_z_region() A = moments.electron.constraints_A_coefficient @@ -2148,19 +2132,19 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp end end -function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) - u_over_vt = upar[1,ir] / vthe[1,ir] +function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa) + u_over_vt = upar / vthe # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar # shift' sigma = -u_over_vt - vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * (vpa.grid - sigma) + vpa_unnorm = @. vpa.scratch2 = vthe * (vpa.grid - sigma) # Initial guess for cut-off velocity is result from previous RK stage (which # might be the previous timestep if this is the first stage). Recalculate this # value from phi. - vcut = sqrt(phi[1,ir] / me_over_mi) + vcut = sqrt(phi / me_over_mi) # -vcut is between minus_vcut_ind-1 and minus_vcut_ind minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) @@ -2222,20 +2206,20 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) reversed_wpa_of_minus_vpa end -function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) - u_over_vt = upar[end,ir] / vthe[end,ir] +function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa) + u_over_vt = upar / vthe # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar # shift' sigma = -u_over_vt # Delete the upar contribution here if ignoring the 'upar shift' - vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * (vpa.grid - sigma) + vpa_unnorm = @. vpa.scratch2 = vthe * (vpa.grid - sigma) # Initial guess for cut-off velocity is result from previous RK stage (which # might be the previous timestep if this is the first stage). Recalculate this # value from phi. - vcut = sqrt(phi[end,ir] / me_over_mi) + vcut = sqrt(phi / me_over_mi) # vcut is between plus_vcut_ind and plus_vcut_ind+1 plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) @@ -2305,10 +2289,314 @@ function get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) end +function fill_integral_pieces!( + pdf_part, vthe, vpa, vpa_unnorm, density_integral_pieces, flow_integral_pieces, + energy_integral_pieces, cubic_integral_pieces, quartic_integral_pieces) + + @. density_integral_pieces = pdf_part * vpa.wgts / sqrt(π) + @. flow_integral_pieces = density_integral_pieces * vpa_unnorm / vthe + @. energy_integral_pieces = flow_integral_pieces * vpa_unnorm / vthe + @. cubic_integral_pieces = energy_integral_pieces * vpa_unnorm / vthe + @. quartic_integral_pieces = cubic_integral_pieces * vpa_unnorm / vthe +end + +function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime, + c1, c1prime, c2, c2prime, d1, d1prime, + e1, e1prime, e2, e2prime, u_over_vt, + bc_constraints) + if bc_constraints + alpha = a1 + 2.0 * a2 + alphaprime = a1prime + 2.0 * a2prime + beta = c1 + 2.0 * c2 + betaprime = c1prime + 2.0 * c2prime + gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta + gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime + delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 + deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime + + A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) + Aprime = (0.5 * betaprime - deltaprime + - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) + / (beta * gamma - alpha * delta) + ) / (beta * gamma - alpha * delta) + C = (1.0 - alpha * A) / beta + Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 + else + A = 1.0 + Aprime = 0.0 + C = 0.0 + Cprime = 0.0 + end + + epsilon = A * b1 + C * d1 - u_over_vt + epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime + + return epsilon, epsilonprime, A, C +end + +function get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_lowerz, flow_integral_pieces_lowerz, + energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz, + quartic_integral_pieces_lowerz, bc_constraints) + # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and + # minus_vcut_ind where -vcut is. + vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + + function get_for_one_moment(integral_pieces) + # Integral contributions from the cell containing vcut. + # Define these as follows to be consistent with the way the cutoff is + # applied around plus_vcut_ind below. + # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` + # include all the contributions from the grid points + # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from + # 'inside' the grid cell. + if vcut_fraction < 0.5 + integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) + + integral_pieces[minus_vcut_ind] + integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + else + integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction) + integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] + + integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + end + + part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1 + + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) + + part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2]) + part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell + # part2prime is d(part2)/d(vcut) + part2prime = -part1prime + + return part1, part1prime, part2, part2prime + end + this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz) + this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz) + this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz) + this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz) + this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz) + + return get_residual_and_coefficients_for_bc( + this_a1, this_a1prime, this_a2, this_a2prime, this_b1, + this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, + this_d1, this_d1prime, this_e1, this_e1prime, this_e2, + this_e2prime, u_over_vt, bc_constraints)..., + this_a2, this_b2, this_c2, this_d2 +end + +function get_lowerz_integral_correction_components( + pdf_slice, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, vcut, + minus_vcut_ind, plus_vcut_ind, bc_constraints) + + # Need to recalculate integral pieces with the updated distribution function + density_integral_pieces_lowerz = vpa.scratch3 + flow_integral_pieces_lowerz = vpa.scratch4 + energy_integral_pieces_lowerz = vpa.scratch5 + cubic_integral_pieces_lowerz = vpa.scratch6 + quartic_integral_pieces_lowerz = vpa.scratch7 + fill_integral_pieces!( + pdf_slice, vthe, vpa, vpa_unnorm, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz) + + # Update the part2 integrals since we've applied the A and C factors + _, _, _, _, a2, b2, c2, d2 = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_lowerz, flow_integral_pieces_lowerz, + energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz, + quartic_integral_pieces_lowerz, bc_constraints) + + function get_part3_for_one_moment_lower(integral_pieces) + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) + + part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz) + b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz) + c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz) + d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz) + + # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. + correction0_integral_pieces = @. vpa.scratch3 = pdf_slice * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe^2) + for ivpa ∈ 1:sigma_ind + # We only add the corrections to 'part3', so zero them out for negative v_∥. + # I think this is only actually significant for `sigma_ind-1` and + # `sigma_ind`. Even though `sigma_ind` is part of the distribution + # function that we are correcting, for v_∥>0, it affects the integral in + # the 'sigma_cell' between `sigma_ind-1` and `sigma_ind`, which would + # affect the numerically calculated integrals for f(v_∥<0), so if we + # 'corrected' its value, those integrals would change and the constraints + # would not be exactly satisfied. The difference should be small, as the + # correction at that point is multiplied by + # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. + correction0_integral_pieces[ivpa] = 0.0 + end + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe + + alpha = get_part3_for_one_moment_lower(correction0_integral_pieces) + beta = get_part3_for_one_moment_lower(correction1_integral_pieces) + gamma = get_part3_for_one_moment_lower(correction2_integral_pieces) + delta = get_part3_for_one_moment_lower(correction3_integral_pieces) + epsilon = get_part3_for_one_moment_lower(correction4_integral_pieces) + zeta = get_part3_for_one_moment_lower(correction5_integral_pieces) + eta = get_part3_for_one_moment_lower(correction6_integral_pieces) + + return a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta +end + +function get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz, bc_constraints) + # vcut_fraction is the fraction of the distance between plus_vcut_ind and + # plus_vcut_ind+1 where vcut is. + vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + + function get_for_one_moment(integral_pieces) + # Integral contribution from the cell containing vcut + # Define these as follows to be consistent with the way the cutoff is + # applied around plus_vcut_ind below. + # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` + # include all the contributions from the grid points `plus_vcut_ind` + # and `plus_vcut_ind+1`, not just those from 'inside' the grid cell. + if vcut_fraction > 0.5 + integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] + + integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5) + integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + else + integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction) + integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) + + integral_pieces[plus_vcut_ind+1] + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + end + + part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1 + + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) + + part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1]) + part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell + # part2prime is d(part2)/d(vcut) + part2prime = -part1prime + + return part1, part1prime, part2, part2prime + end + this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz) + this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz) + this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz) + this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz) + this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz) + + return get_residual_and_coefficients_for_bc( + this_a1, this_a1prime, this_a2, this_a2prime, this_b1, + this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, + this_d1, this_d1prime, this_e1, this_e1prime, this_e2, + this_e2prime, u_over_vt, bc_constraints)..., + this_a2, this_b2, this_c2, this_d2 +end + +function get_upperz_integral_correction_components( + pdf_slice, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, vcut, + minus_vcut_ind, plus_vcut_ind, bc_constraints) + + # Need to recalculate integral pieces with the updated distribution function + density_integral_pieces_upperz = vpa.scratch3 + flow_integral_pieces_upperz = vpa.scratch4 + energy_integral_pieces_upperz = vpa.scratch5 + cubic_integral_pieces_upperz = vpa.scratch6 + quartic_integral_pieces_upperz = vpa.scratch7 + fill_integral_pieces!( + pdf_slice, vthe, vpa, vpa_unnorm, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz) + + # Update the part2 integrals since we've applied the A and C factors + _, _, _, _, a2, b2, c2, d2 = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz, bc_constraints) + + function get_part3_for_one_moment_upper(integral_pieces) + # Integral contribution from the cell containing sigma + integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) + + part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz) + b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz) + c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz) + d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz) + + # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. + correction0_integral_pieces = @. vpa.scratch3 = pdf_slice * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe^2) + for ivpa ∈ sigma_ind:vpa.n + # We only add the corrections to 'part3', so zero them out for positive v_∥. + # I think this is only actually significant for `sigma_ind` and + # `sigma_ind+1`. Even though `sigma_ind` is part of the distribution + # function that we are correcting, for v_∥<0, it affects the integral in + # the 'sigma_cell' between `sigma_ind` and `sigma_ind+1`, which would + # affect the numerically calculated integrals for f(v_∥>0), so if we + # 'corrected' its value, those integrals would change and the constraints + # would not be exactly satisfied. The difference should be small, as the + # correction at that point is multiplied by + # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. + correction0_integral_pieces[ivpa] = 0.0 + end + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe + + alpha = get_part3_for_one_moment_upper(correction0_integral_pieces) + beta = get_part3_for_one_moment_upper(correction1_integral_pieces) + gamma = get_part3_for_one_moment_upper(correction2_integral_pieces) + delta = get_part3_for_one_moment_upper(correction3_integral_pieces) + epsilon = get_part3_for_one_moment_upper(correction4_integral_pieces) + zeta = get_part3_for_one_moment_upper(correction5_integral_pieces) + eta = get_part3_for_one_moment_upper(correction6_integral_pieces) + + return a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta +end + @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi, ir; - bc_constraints=true, update_vcut=true) = begin + bc_constraints=true, update_vcut=true, lowerz_vcut_inds=nothing, + upperz_vcut_inds=nothing) = begin @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value") @@ -2373,39 +2661,6 @@ end newton_max_its = 100 - function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime, - c1, c1prime, c2, c2prime, d1, d1prime, - e1, e1prime, e2, e2prime, u_over_vt) - if bc_constraints - alpha = a1 + 2.0 * a2 - alphaprime = a1prime + 2.0 * a2prime - beta = c1 + 2.0 * c2 - betaprime = c1prime + 2.0 * c2prime - gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta - gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime - delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2 - deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime - - A = (0.5 * beta - delta) / (beta * gamma - alpha * delta) - Aprime = (0.5 * betaprime - deltaprime - - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime) - / (beta * gamma - alpha * delta) - ) / (beta * gamma - alpha * delta) - C = (1.0 - alpha * A) / beta - Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2 - else - A = 1.0 - Aprime = 0.0 - C = 0.0 - Cprime = 0.0 - end - - epsilon = A * b1 + C * d1 - u_over_vt - epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime - - return epsilon, epsilonprime, A, C - end - @serial_region begin if z.irank == 0 if z.bc != "wall" @@ -2418,8 +2673,9 @@ end vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, element_with_zero, element_with_zero_boundary, last_point_near_zero, - reversed_wpa_of_minus_vpa = get_cutoff_params_lower(upar, vthe, phi, - me_over_mi, vpa, ir) + reversed_wpa_of_minus_vpa = + get_cutoff_params_lower(upar[1,ir], vthe[1,ir], phi[1,ir], me_over_mi, + vpa) # interpolate the pdf onto this grid # 'near zero' means in the range where @@ -2441,75 +2697,31 @@ end pdf[last_point_near_zero+1:end,1,1] .= reversed_pdf_far_from_zero # Per-grid-point contributions to moment integrals - # Note that we need to include the normalisation factor of 1/sqrt(pi) that + # Note that we need to include the normalisation factor of 1/sqrt(π) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. - density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) - flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1] - energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1] - cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1] - quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1] - - function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) - # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and - # minus_vcut_ind where -vcut is. - local vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) - - function get_for_one_moment(integral_pieces) - # Integral contributions from the cell containing vcut. - # Define these as follows to be consistent with the way the cutoff is - # applied around plus_vcut_ind below. - # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` - # include all the contributions from the grid points - # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from - # 'inside' the grid cell. - if vcut_fraction < 0.5 - integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) + - integral_pieces[minus_vcut_ind] - integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction) - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) - else - integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction) - integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] + - integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5) - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) - end - - part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1 - - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) - - part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2]) - part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell - # part2prime is d(part2)/d(vcut) - part2prime = -part1prime - - return part1, part1prime, part2, part2prime - end - this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz) - this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz) - this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz) - this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz) - this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz) - - return get_residual_and_coefficients_for_bc( - this_a1, this_a1prime, this_a2, this_a2prime, this_b1, - this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, - this_d1, this_d1prime, this_e1, this_e1prime, this_e2, - this_e2prime, u_over_vt)..., - this_a2, this_b2, this_c2, this_d2 - end + density_integral_pieces_lowerz = vpa.scratch3 + flow_integral_pieces_lowerz = vpa.scratch4 + energy_integral_pieces_lowerz = vpa.scratch5 + cubic_integral_pieces_lowerz = vpa.scratch6 + quartic_integral_pieces_lowerz = vpa.scratch7 + fill_integral_pieces!( + @view(pdf[:,1,1]), vthe[1], vpa, vpa_unnorm, + density_integral_pieces_lowerz, flow_integral_pieces_lowerz, + energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz, + quartic_integral_pieces_lowerz) counter = 1 A = 1.0 C = 0.0 # Always do at least one update of vcut - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz, + bc_constraints) if bc_constraints while true # Newton iteration update. Note that primes denote derivatives with @@ -2531,7 +2743,13 @@ end vcut = vcut + delta_v minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz, + bc_constraints) if abs(epsilon) < newton_tol break @@ -2558,7 +2776,13 @@ end break end - epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + epsilon, epsilonprime, _, _, _, _, _, _ = + get_integrals_and_derivatives_lowerz( + vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz, + bc_constraints) end end @@ -2572,8 +2796,14 @@ end # plus_vcut_ind+1 where vcut is. vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) if vcut_fraction > 0.5 + if lowerz_vcut_inds !== nothing + lowerz_vcut_inds[ir] = plus_vcut_ind+1 + end pdf[plus_vcut_ind+1,1,1] *= vcut_fraction - 0.5 else + if lowerz_vcut_inds !== nothing + lowerz_vcut_inds[ir] = plus_vcut_ind + end pdf[plus_vcut_ind+1,1,1] = 0.0 pdf[plus_vcut_ind,1,1] *= vcut_fraction + 0.5 end @@ -2591,60 +2821,11 @@ end # boundary condition, but would not be numerically true because of the # interpolation. - # Need to recalculate these with the updated distribution function - @views @. density_integral_pieces_lowerz = pdf[:,1,1] * vpa.wgts / sqrt(pi) - @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1] - @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1] - @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1] - @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1] - - # Update the part2 integrals since we've applied the A and C factors - _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) - - function get_part3_for_one_moment_lower(integral_pieces) - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) - - part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1]) - part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell - - return part3 - end - a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz) - b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz) - c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz) - d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz) - - # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. - sharpness = 4.0 - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1]^2) - for ivpa ∈ 1:sigma_ind - # We only add the corrections to 'part3', so zero them out for negative v_∥. - # I think this is only actually significant for `sigma_ind-1` and - # `sigma_ind`. Even though `sigma_ind` is part of the distribution - # function that we are correcting, for v_∥>0, it affects the integral in - # the 'sigma_cell' between `sigma_ind-1` and `sigma_ind`, which would - # affect the numerically calculated integrals for f(v_∥<0), so if we - # 'corrected' its value, those integrals would change and the constraints - # would not be exactly satisfied. The difference should be small, as the - # correction at that point is multiplied by - # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. - correction0_integral_pieces[ivpa] = 0.0 - end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1] - - alpha = get_part3_for_one_moment_lower(correction0_integral_pieces) - beta = get_part3_for_one_moment_lower(correction1_integral_pieces) - gamma = get_part3_for_one_moment_lower(correction2_integral_pieces) - delta = get_part3_for_one_moment_lower(correction3_integral_pieces) - epsilon = get_part3_for_one_moment_lower(correction4_integral_pieces) - zeta = get_part3_for_one_moment_lower(correction5_integral_pieces) - eta = get_part3_for_one_moment_lower(correction6_integral_pieces) + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_lowerz_integral_correction_components( + @view(pdf[:,1,1]), vthe[1], vpa, vpa_unnorm, u_over_vt, + sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, + bc_constraints) # Update the v_∥>0 part of f to correct the moments as # f(0 0.5 - integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] + - integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5) - integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction) - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) - else - integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction) - integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) + - integral_pieces[plus_vcut_ind+1] - - # part1prime is d(part1)/d(vcut) - part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) - end - - part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1 - - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) - - part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1]) - part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell - # part2prime is d(part2)/d(vcut) - part2prime = -part1prime - - return part1, part1prime, part2, part2prime - end - this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz) - this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz) - this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz) - this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz) - this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz) - - return get_residual_and_coefficients_for_bc( - this_a1, this_a1prime, this_a2, this_a2prime, this_b1, - this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, - this_d1, this_d1prime, this_e1, this_e1prime, this_e2, - this_e2prime, u_over_vt)..., - this_a2, this_b2, this_c2, this_d2 - end + density_integral_pieces_upperz = vpa.scratch3 + flow_integral_pieces_upperz = vpa.scratch4 + energy_integral_pieces_upperz = vpa.scratch5 + cubic_integral_pieces_upperz = vpa.scratch6 + quartic_integral_pieces_upperz = vpa.scratch7 + fill_integral_pieces!( + @view(pdf[:,1,end]), vthe[end], vpa, vpa_unnorm, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz) counter = 1 # Always do at least one update of vcut - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt, + density_integral_pieces_upperz, flow_integral_pieces_upperz, + energy_integral_pieces_upperz, cubic_integral_pieces_upperz, + quartic_integral_pieces_upperz, bc_constraints) if bc_constraints while true # Newton iteration update. Note that primes denote derivatives with @@ -2801,7 +2939,13 @@ end vcut = vcut + delta_v plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_upperz, + flow_integral_pieces_upperz, energy_integral_pieces_upperz, + cubic_integral_pieces_upperz, quartic_integral_pieces_upperz, + bc_constraints) if abs(epsilon) < newton_tol break @@ -2828,7 +2972,13 @@ end break end - epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + epsilon, epsilonprime, _, _, _, _, _, _ = + get_integrals_and_derivatives_upperz( + vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, + u_over_vt, density_integral_pieces_upperz, + flow_integral_pieces_upperz, energy_integral_pieces_upperz, + cubic_integral_pieces_upperz, quartic_integral_pieces_upperz, + bc_constraints) end end @@ -2842,8 +2992,14 @@ end # minus_vcut_ind where -vcut is. vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) if vcut_fraction < 0.5 + if upperz_vcut_inds !== nothing + upperz_vcut_inds[ir] = minus_vcut_ind-1 + end pdf[minus_vcut_ind-1,1,end] *= 0.5 - vcut_fraction else + if upperz_vcut_inds !== nothing + upperz_vcut_inds[ir] = minus_vcut_ind + end pdf[minus_vcut_ind-1,1,end] = 0.0 pdf[minus_vcut_ind,1,end] *= 1.5 - vcut_fraction end @@ -2861,60 +3017,11 @@ end # boundary condition, but would not be numerically true because of the # interpolation. - # Need to recalculate these with the updated distribution function - @views @. density_integral_pieces_upperz = pdf[:,1,end] * vpa.wgts / sqrt(pi) - @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end] - @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end] - @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end] - @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end] - - # Update the part2 integrals since we've applied the A and C factors - _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) - - function get_part3_for_one_moment_upper(integral_pieces) - # Integral contribution from the cell containing sigma - integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) - - part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1]) - part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell - - return part3 - end - a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz) - b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz) - c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz) - d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz) - - # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. - sharpness = 4.0 - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end]^2) - for ivpa ∈ sigma_ind:vpa.n - # We only add the corrections to 'part3', so zero them out for positive v_∥. - # I think this is only actually significant for `sigma_ind` and - # `sigma_ind+1`. Even though `sigma_ind` is part of the distribution - # function that we are correcting, for v_∥<0, it affects the integral in - # the 'sigma_cell' between `sigma_ind` and `sigma_ind+1`, which would - # affect the numerically calculated integrals for f(v_∥>0), so if we - # 'corrected' its value, those integrals would change and the constraints - # would not be exactly satisfied. The difference should be small, as the - # correction at that point is multiplied by - # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. - correction0_integral_pieces[ivpa] = 0.0 - end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end] - - alpha = get_part3_for_one_moment_upper(correction0_integral_pieces) - beta = get_part3_for_one_moment_upper(correction1_integral_pieces) - gamma = get_part3_for_one_moment_upper(correction2_integral_pieces) - delta = get_part3_for_one_moment_upper(correction3_integral_pieces) - epsilon = get_part3_for_one_moment_upper(correction4_integral_pieces) - zeta = get_part3_for_one_moment_upper(correction5_integral_pieces) - eta = get_part3_for_one_moment_upper(correction6_integral_pieces) + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_upperz_integral_correction_components( + @view(pdf[:,1,end]), vthe[end], vpa, vpa_unnorm, u_over_vt, + sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, + bc_constraints) # Update the v_∥>0 part of f to correct the moments as # f(0 -zero + residual[ivpa,ivperp,1] = 0.0 + end + end + end + if z.irank == z.nrank - 1 + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir], + moments.electron.upar[end,ir], true, true) + @loop_vperp_vpa ivperp ivpa begin + if v_unnorm[ivpa] < zero + residual[ivpa,ivperp,end] = 0.0 + end + end + end + end + return nothing +end + +""" + add_wall_boundary_condition_to_Jacobian!(jacobian, phi, pdf, ppar, vthe, upar, z, + vperp, vpa, vperp_spectral, vpa_spectral, + vpa_adv, moments, vpa_diffusion, me_over_mi, + ir) + +All the contributions that we add in this function have to be added with a -'ve sign so +that they combine with the 1 on the diagonal of the preconditioner matrix to make rows +corresponding to the boundary points which define constraint equations imposing the +boundary condition on those entries of δg (when the right-hand-side is set to zero). +""" +@timeit global_timer add_wall_boundary_condition_to_Jacobian!( + jacobian, phi, pdf, ppar, vthe, upar, z, vperp, vpa, + vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, + me_over_mi, ir, include, iz=nothing; + pdf_offset=0, ppar_offset=0) = begin + if z.bc != "wall" + return nothing + end + + if include ∈ (:all, :explicit_v) + include_lower = (z.irank == 0) + include_upper = (z.irank == z.nrank - 1) + zend = z.n + phi_lower = phi[1] + phi_upper = phi[end] + pdf_lower = @view pdf[:,:,1] + pdf_upper = @view pdf[:,:,end] + ppar_lower = ppar[1] + ppar_upper = ppar[end] + vthe_lower = vthe[1] + vthe_upper = vthe[end] + upar_lower = upar[1] + upar_upper = upar[end] + shared_mem_parallel = true + elseif include === :implicit_v + include_lower = (z.irank == 0) && iz == 1 + include_upper = (z.irank == z.nrank - 1) && iz == z.n + zend = 1 + phi_lower = phi_upper = phi + pdf_lower = pdf_upper = pdf + ppar_lower = ppar_upper = ppar + vthe_lower = vthe_upper = vthe + upar_lower = upar_upper = upar + + # When using :implicit_v, this function is called inside a loop that is already + # parallelised over z, so we cannot change the parallel region type. + shared_mem_parallel = false + else + return nothing + end + + if include_lower + shared_mem_parallel && begin_vperp_region() + @loop_vperp ivperp begin + # Skip velocity space boundary points. + if vperp.n > 1 && ivperp == vperp.n + continue + end + + # Get matrix entries for the response of the sheath-edge boundary condition. + # Ignore constraints, as these are non-linear and also should be small + # corrections which should not matter much for a preconditioner. + + jac_range = pdf_offset+(ivperp-1)*vpa.n+1 : pdf_offset+ivperp*vpa.n + jacobian_zbegin = @view jacobian[jac_range,jac_range] + jacobian_zbegin_ppar = @view jacobian[jac_range,ppar_offset+1] + + vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, last_point_near_zero, + reversed_wpa_of_minus_vpa = + get_cutoff_params_lower(upar_lower, vthe_lower, phi_lower, me_over_mi, + vpa) + + plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) + # plus_vcut_fraction is the fraction of the distance between plus_vcut_ind and + # plus_vcut_ind+1 where vcut is. + plus_vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + if plus_vcut_fraction > 0.5 + last_nonzero_ind = plus_vcut_ind + 1 + else + last_nonzero_ind = plus_vcut_ind + end + + # Interpolate to the 'near zero' points + @views fill_interpolate_symmetric_matrix!( + jacobian_zbegin[sigma_ind:last_point_near_zero,element_with_zero_boundary:sigma_ind-1], + vpa_unnorm[sigma_ind:last_point_near_zero], + vpa_unnorm[element_with_zero_boundary:sigma_ind-1]) + + # Interpolate to the 'far from zero' points + @views fill_1d_interpolation_matrix!( + jacobian_zbegin[last_nonzero_ind:-1:last_point_near_zero+1,:], + reversed_wpa_of_minus_vpa[vpa.n-last_nonzero_ind+1:vpa.n-last_point_near_zero], + vpa, vpa_spectral) + + # Reverse the sign of the elements we just filled + jacobian_zbegin[sigma_ind:last_nonzero_ind,1:sigma_ind-1] .*= -1.0 + + if plus_vcut_fraction > 0.5 + jacobian_zbegin[last_nonzero_ind,1:sigma_ind-1] .*= plus_vcut_fraction - 0.5 + else + jacobian_zbegin[last_nonzero_ind,1:sigma_ind-1] .*= plus_vcut_fraction + 0.5 + end + + # Fill in elements giving response to changes in electron_ppar + # A change to ppar results in a shift in the location of w_∥(v_∥=0). + # The interpolated values of g_e(w_∥) that are filled by the boundary + # condition are (dropping _∥ subscripts for the remaining comments in this + # if-clause) + # g(w|v>0) = g(2*u/vth - w) + # So + # δg(w|v>0) = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δvth + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δ(sqrt(2*ppar/n) + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δppar * vth / (2*ppar) + # = -u/vth/ppar * dg/dw(2*u/vth - w) * δppar + # + # As jacobian_zbegin_ppar only depends on variations of a single value + # `ppar[1]`, it might be more maintainable and computationally cheaper to + # calculate jacobian_zbegin_ppar with a finite difference method (applying the + # boundary condition function with a perturbed pressure `ppar[1] + ϵ` for some + # small ϵ) rather than calculating it using the method below. If we used the + # finite difference approach, we would have to be careful that the ϵ step does + # not change the cutoff index (otherwise we would pick up non-smooth changes) + # - one possibility might to be to switch to `-ϵ` instead of `+ϵ` if this + # happens. + + dpdfdv_near_zero = @view vpa.scratch[sigma_ind:last_point_near_zero] + @views interpolate_symmetric!(dpdfdv_near_zero, + vpa_unnorm[sigma_ind:last_point_near_zero], + pdf_lower[element_with_zero_boundary:sigma_ind-1,ivperp], + vpa_unnorm[element_with_zero_boundary:sigma_ind-1], + Val(1)) + # The above call to interpolate_symmetric calculates dg/dv rather than dg/dw, + # so need to multiply by an extra factor of vthe. + # δg(w|v>0) = -u/ppar * dg/dv(2*u/vth - w) * δppar + @. jacobian_zbegin_ppar[sigma_ind:last_point_near_zero] -= + -upar_lower / ppar_lower * dpdfdv_near_zero + + dpdfdw_far_from_zero = @view vpa.scratch[last_point_near_zero+1:last_nonzero_ind] + @views interpolate_to_grid_1d!(dpdfdw_far_from_zero, + reversed_wpa_of_minus_vpa[vpa.n-last_nonzero_ind+1:vpa.n-last_point_near_zero], + pdf_lower[:,ivperp], vpa, vpa_spectral, Val(1)) + reverse!(dpdfdw_far_from_zero) + # Note that because we calculated the derivative of the interpolating + # function, and then reversed the results, we need to multiply the derivative + # by -1. + @. jacobian_zbegin_ppar[last_point_near_zero+1:last_nonzero_ind] -= + upar_lower / vthe_lower / ppar_lower * dpdfdw_far_from_zero + + # Whatever the variation due to interpolation is at the last nonzero grid + # point, it will be reduced by the cutoff. + if plus_vcut_fraction > 0.5 + jacobian_zbegin_ppar[last_nonzero_ind] *= plus_vcut_fraction - 0.5 + else + jacobian_zbegin_ppar[last_nonzero_ind] *= plus_vcut_fraction + 0.5 + end + + # The change in electron_ppar also changes the position of + # wcut = (vcut - upar)/vthe + # as vcut does not change (within the Krylov iteration where this + # preconditioner matrix is used), but vthe does because of the change in + # electron_ppar. We actually use plus_vcut_fraction calculated from + # vpa_unnorm, so it is most convenient to consider: + # v = vthe * w + upar + # δv = δvthe * w + # = δvthe * (v - upar)/vthe + # = δppar * vthe / (2*ppar) * (v - upar)/vthe + # = δppar * (v - upar) / 2 / ppar + # with vl and vu the values of v at the grid points below and above vcut + # plus_vcut_fraction = (vcut - vl) / (vu - vl) + # δplus_vcut_fraction = -(vcut - vl) / (vu - vl)^2 * (δvu - δvl) - δvl / (vu - vl) + # δplus_vcut_fraction = [-(vcut - vl) / (vu - vl)^2 * (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δplus_vcut_fraction = [-(vcut - vl) / (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δplus_vcut_fraction = -(vcut - upar) / (vu - vl) / 2 / ppar * δppar + interpolated_pdf_at_last_nonzero_ind = @view vpa.scratch[1:1] + reversed_last_nonzero_ind = vpa.n-last_nonzero_ind+1 + @views interpolate_to_grid_1d!(interpolated_pdf_at_last_nonzero_ind, + reversed_wpa_of_minus_vpa[reversed_last_nonzero_ind:reversed_last_nonzero_ind], + pdf_lower[:,ivperp], vpa, vpa_spectral) + + dplus_vcut_fraction_dp = -(vcut - upar_lower) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) / 2.0 / ppar_lower + jacobian_zbegin_ppar[last_nonzero_ind] -= interpolated_pdf_at_last_nonzero_ind[] * dplus_vcut_fraction_dp + + # Calculate some numerical integrals of dpdfdw that we will need later + function get_part3_for_one_moment_lower(integral_pieces) + # Integral contribution from the cell containing sigma + # The contribution from integral_pieces[sigma_ind-1] should be dropped, + # because that point is on the input grid, and this correction is due to + # the changes in interpolated values on the output grid due to δp_e∥. The + # value of g_e at sigma_ind-1 isn't interpolated so does not change in + # this way. + integral_sigma_cell = 0.5 * integral_pieces[sigma_ind] + + part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + # The contents of jacobian_zbegin_ppar at this point are the coefficients + # needed to get the new distribution function due to a change δp, which we can + # now use to calculate the response of various integrals to the same change. + # Note that jacobian_zbegin_ppar already contains `2.0*dsigma_dp`. + @. vpa.scratch = -jacobian_zbegin_ppar * vpa.wgts / sqrt(π) + da3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + db3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dc3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dd3_dp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch = -jacobian_zbegin_ppar * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2) + vpa.scratch[sigma_ind-1:sigma_ind] .= 0.0 + dalpha_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dbeta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dgamma_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + ddelta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + depsilon_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + dzeta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_lower + deta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch) + + pdf_lowerz = vpa.scratch10 + @views @. pdf_lowerz[1:sigma_ind-1] = pdf_lower[1:sigma_ind-1,ivperp] + + # interpolate the pdf_lowerz onto this grid + # 'near zero' means in the range where + # abs(v_∥)≤abs(lower boundary of element including v_∥=0) + # 'far from zero' means larger values of v_∥. + + # Interpolate to the 'near zero' points + @views interpolate_symmetric!(pdf_lowerz[sigma_ind:last_point_near_zero], + vpa_unnorm[sigma_ind:last_point_near_zero], + pdf_lowerz[element_with_zero_boundary:sigma_ind-1], + vpa_unnorm[element_with_zero_boundary:sigma_ind-1]) + + # Interpolate to the 'far from zero' points + reversed_pdf_far_from_zero = @view pdf_lowerz[last_point_near_zero+1:end] + interpolate_to_grid_1d!(reversed_pdf_far_from_zero, + reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero], + pdf_lowerz, vpa, vpa_spectral) + reverse!(reversed_pdf_far_from_zero) + + minus_vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + if minus_vcut_fraction < 0.5 + lower_cutoff_ind = minus_vcut_ind-1 + else + lower_cutoff_ind = minus_vcut_ind + end + if plus_vcut_fraction > 0.5 + upper_cutoff_ind = plus_vcut_ind+1 + upper_cutoff_factor = plus_vcut_fraction - 0.5 + else + upper_cutoff_ind = plus_vcut_ind + upper_cutoff_factor = plus_vcut_fraction + 0.5 + end + pdf_lowerz[upper_cutoff_ind] *= upper_cutoff_factor + pdf_lowerz[upper_cutoff_ind+1:end] .= 0.0 + + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_lowerz_integral_correction_components( + pdf_lowerz, vthe_lower, vpa, vpa_unnorm, u_over_vt, sigma_ind, + sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, false) + + output_range = sigma_ind+1:upper_cutoff_ind + + v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_lower + + correction_matrix = [alpha beta gamma delta ; + beta gamma delta epsilon ; + gamma delta epsilon zeta ; + delta epsilon zeta eta + ] + + # jacobian_zbegin_ppar state at this point would generate (when multiplied by + # some δp) a δf due to the effectively-shifted grid. The unperturbed f + # required corrections to make its integrals correct. Need to apply the same + # corrections to δf. + # This term seems to have a very small contribution, and could probably be + # skipped. + A, B, C, D = correction_matrix \ [a2-a3, -b2-b3, c2-c3, -d2-d3] + @. jacobian_zbegin_ppar[output_range] *= + 1.0 + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) + + # Calculate the changes in the integrals a2, b2, c2, d2, a3, b3, c3, and d3 in + # response to changes in electron_ppar. The changes are calculated for the + # combinations (a2-a3), (-b2-b3), (c2-c3), and (-d2-d3) to take advantage of + # some cancellations. + + # Need to calculate the variation of various intermediate quantities with + # δp. + # + # vth = sqrt(2*p/n) + # δvth = vth / (2 * p) * δp + # + # sigma = -u / vth + # δsigma = u / vth^2 δvth + # = u / (2 * vth * p) * δp + # + # We could write sigma_fraction as + # sigma_fraction = (sigma - vpa[sigma_ind-1]) / (vpa[sigma_ind] - vpa[sigma_ind-1]) + # so that + # δsigma_fraction = δsigma / (vpa[sigma_ind] - vpa[sigma_ind-1]) + # = u / (2 * vth * p) / (vpa[sigma_ind] - vpa[sigma_ind-1]) * δp + # + # minus_vcut_fraction = ((-vcut - u)/vth - vpa[minus_vcut_ind-1]) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) + # δminus_vcut_fraction = (vcut + u) / vth^2 / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δvth + # = (vcut + u) / (2 * vth * p) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δp + # + # plus_vcut_fraction = ((vcut - u)/vth - vpa[plus_vcut_ind-1]) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) + # δplus_vcut_fraction = -(vcut - u) / vth^2 / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δvth + # = -(vcut - u) / (2 * vth * p) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δp + density_integral_pieces_lowerz = vpa.scratch3 + flow_integral_pieces_lowerz = vpa.scratch4 + energy_integral_pieces_lowerz = vpa.scratch5 + cubic_integral_pieces_lowerz = vpa.scratch6 + quartic_integral_pieces_lowerz = vpa.scratch7 + fill_integral_pieces!( + pdf_lowerz, vthe_lower, vpa, vpa_unnorm, density_integral_pieces_lowerz, + flow_integral_pieces_lowerz, energy_integral_pieces_lowerz, + cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz) + vpa_grid = vpa.grid + + dsigma_dp = upar_lower / (2.0 * vthe_lower * ppar_lower) + + dsigma_fraction_dp = dsigma_dp / (vpa_grid[sigma_ind] - vpa_grid[sigma_ind-1]) + + dminus_vcut_fraction_dp = (vcut + upar_lower) / (2.0 * vthe_lower * ppar_lower) / (vpa_grid[minus_vcut_ind] - vpa_grid[minus_vcut_ind-1]) + + dplus_vcut_fraction_dp = -(vcut - upar_lower) / (2.0 * vthe_lower * ppar_lower) / (vpa_grid[plus_vcut_ind+1] - vpa_grid[plus_vcut_ind]) + + density_integral_sigma_cell = 0.5 * (density_integral_pieces_lowerz[sigma_ind-1] + + density_integral_pieces_lowerz[sigma_ind]) + da2_minus_a3_dp = ( + # Contribution from integral limits at sigma + 2.0 * density_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in da3_dp + - density_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # No contribution from explicit σ factors in w-integral for a2 or a3 as + # integrand does not contain σ. + # Change in a3 integral due to different interpolated ̂g values + - da3_dp + ) + + dminus_b2_minus_b3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in db3_dp + + flow_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + (a2 + a3) * dsigma_dp + # Change in b3 integral due to different interpolated ̂g values + - db3_dp + ) + + energy_integral_sigma_cell = 0.5 * (energy_integral_pieces_lowerz[sigma_ind-1] + + energy_integral_pieces_lowerz[sigma_ind]) + dc2_minus_c3_dp = ( + # Contribution from integral limits at sigma + 2.0 * energy_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in dc3_dp + - energy_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 2.0 * (-b2 + b3) * dsigma_dp + # Change in c3 integral due to different interpolated ̂g values + - dc3_dp + ) + + dminus_d2_minus_d3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at -wcut-. The contribution from wcut+ + # is included in dd3_dp + + cubic_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 3.0 * (c2 + c3) * dsigma_dp + # Change in d3 integral due to different interpolated ̂g values + - dd3_dp + ) + + correction0_integral_pieces = @. vpa.scratch3 = pdf_lowerz * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2) + for ivpa ∈ 1:sigma_ind + correction0_integral_pieces[ivpa] = 0.0 + end + + correctionminus1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces / vpa_unnorm * vthe_lower + integralminus1 = sum(@view(correctionminus1_integral_pieces[sigma_ind+1:upper_cutoff_ind])) + + correction0_integral_type2_pieces = @. vpa.scratch4 = pdf_lowerz * vpa.wgts / sqrt(π) * 2.0 * integral_correction_sharpness^2 * vpa_unnorm^3 / vthe_lower^3 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2)^2 + integral_type2 = sum(@view(correction0_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dalpha_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dalpha_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * integralminus1 + integral_type2) * dsigma_dp + # Change in alpha integral due to different interpolated ̂g values + + dalpha_dp_interp + ) + + correction1_integral_pieces = @. vpa.scratch5 = correction0_integral_pieces * vpa_unnorm / vthe_lower + correction1_integral_type2_pieces = @. vpa.scratch6 = correction0_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction1_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dbeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dbeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * alpha + integral_type2) * dsigma_dp + # Change in beta integral due to different interpolated ̂g values + + dbeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction1_integral_pieces + # and correction1_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe_lower + correction2_integral_type2_pieces = @. vpa.scratch6 = correction1_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction2_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dgamma_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dgamma_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * beta + integral_type2) * dsigma_dp + # Change in gamma integral due to different interpolated ̂g values + + dgamma_dp_interp + ) + + # Here we overwrite the buffers that were used for correction2_integral_pieces + # and correction2_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction3_integral_pieces = @. vpa.scratch5 = correction2_integral_pieces * vpa_unnorm / vthe_lower + correction3_integral_type2_pieces = @. vpa.scratch6 = correction2_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction3_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + ddelta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in ddelta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * gamma + integral_type2) * dsigma_dp + # Change in delta integral due to different interpolated ̂g values + + ddelta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction3_integral_pieces + # and correction3_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction4_integral_pieces = @. vpa.scratch5 = correction3_integral_pieces * vpa_unnorm / vthe_lower + correction4_integral_type2_pieces = @. vpa.scratch6 = correction3_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction4_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + depsilon_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in depsilon_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * delta + integral_type2) * dsigma_dp + # Change in epsilon integral due to different interpolated ̂g values + + depsilon_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction5_integral_pieces = @. vpa.scratch5 = correction4_integral_pieces * vpa_unnorm / vthe_lower + correction5_integral_type2_pieces = @. vpa.scratch6 = correction4_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction5_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + dzeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in dzeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * epsilon + integral_type2) * dsigma_dp + # Change in zeta integral due to different interpolated ̂g values + + dzeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction6_integral_pieces = @. vpa.scratch5 = correction5_integral_pieces * vpa_unnorm / vthe_lower + correction6_integral_type2_pieces = @. vpa.scratch6 = correction5_integral_type2_pieces * vpa_unnorm / vthe_lower + integral_type2 = sum(@view(correction6_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind])) + deta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at wcut+ is included in deta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * zeta + integral_type2) * dsigma_dp + # Change in eta integral due to different interpolated ̂g values + + deta_dp_interp + ) + + dA_dp, dB_dp, dC_dp, dD_dp = correction_matrix \ ( + [da2_minus_a3_dp, dminus_b2_minus_b3_dp, dc2_minus_c3_dp, dminus_d2_minus_d3_dp] + - [dalpha_dp dbeta_dp dgamma_dp ddelta_dp ; + dbeta_dp dgamma_dp ddelta_dp depsilon_dp ; + dgamma_dp ddelta_dp depsilon_dp dzeta_dp ; + ddelta_dp depsilon_dp dzeta_dp deta_dp + ] + * [A, B, C, D] + ) + + @views @. jacobian_zbegin_ppar[output_range] -= + (dA_dp + + dB_dp * v_over_vth + + dC_dp * v_over_vth^2 + + dD_dp * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_lowerz[output_range] + + # Add variation due to variation of ̃v coordinate with δp. + # These contributions seem to make almost no difference, and could probably be + # skipped. + dv_over_vth_dp = -dsigma_dp + @views @. jacobian_zbegin_ppar[output_range] -= ( + (B * dv_over_vth_dp + + 2.0 * C * v_over_vth * dv_over_vth_dp + + 3.0 * D * v_over_vth^2 * dv_over_vth_dp) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_lowerz[output_range] + + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + (2.0 * integral_correction_sharpness * v_over_vth * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2) + - 2.0 * integral_correction_sharpness^2 * v_over_vth^3 * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)^2) * + pdf_lowerz[output_range] + ) + end + end + + if include_upper + shared_mem_parallel && begin_vperp_region() + @loop_vperp ivperp begin + # Skip vperp boundary points. + if vperp.n > 1 && ivperp == vperp.n + continue + end + + # Get matrix entries for the response of the sheath-edge boundary condition. + # Ignore constraints, as these are non-linear and also should be small + # corrections which should not matter much for a preconditioner. + + jac_range = pdf_offset+(zend-1)*vperp.n*vpa.n+(ivperp-1)*vpa.n+1 : pdf_offset+(zend-1)*vperp.n*vpa.n+ivperp*vpa.n + jacobian_zend = @view jacobian[jac_range,jac_range] + jacobian_zend_ppar = @view jacobian[jac_range,ppar_offset+zend] + + vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, first_point_near_zero, + reversed_wpa_of_minus_vpa = + get_cutoff_params_upper(upar_upper, vthe_upper, phi_upper, me_over_mi, + vpa) + + minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) + # minus_vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and + # minus_vcut_ind where -vcut is. + minus_vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + + if minus_vcut_fraction < 0.5 + first_nonzero_ind = minus_vcut_ind - 1 + else + first_nonzero_ind = minus_vcut_ind + end + + # Interpolate to the 'near zero' points + @views fill_interpolate_symmetric_matrix!( + jacobian_zend[first_point_near_zero:sigma_ind,sigma_ind+1:element_with_zero_boundary], + vpa_unnorm[first_point_near_zero:sigma_ind], + vpa_unnorm[sigma_ind+1:element_with_zero_boundary]) + + # Interpolate to the 'far from zero' points + @views fill_1d_interpolation_matrix!( + jacobian_zend[first_point_near_zero-1:-1:first_nonzero_ind,:], + reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:vpa.n-first_nonzero_ind+1], + vpa, vpa_spectral) + + # Reverse the sign of the elements we just filled + jacobian_zend[first_nonzero_ind:sigma_ind,sigma_ind+1:end] .*= -1.0 + + if minus_vcut_fraction < 0.5 + jacobian_zend[first_nonzero_ind,sigma_ind+1:end] .*= 0.5 - minus_vcut_fraction + else + jacobian_zend[first_nonzero_ind,sigma_ind+1:end] .*= 1.5 - minus_vcut_fraction + end + + # Fill in elements giving response to changes in electron_ppar + # A change to ppar results in a shift in the location of w_∥(v_∥=0). + # The interpolated values of g_e(w_∥) that are filled by the boundary + # condition are (dropping _∥ subscripts for the remaining comments in this + # if-clause) + # g(w|v<0) = g(2*u/vth - w) + # So + # δg(w|v<0) = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δvth + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δ(sqrt(2*ppar/n) + # = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δppar * vth / (2*ppar) + # = -u/vth/ppar * dg/dw(2*u/vth - w) * δppar + # + # As jacobian_zend_ppar only depends on variations of a single value + # `ppar[end]` it might be more maintainable and computationally cheaper to + # calculate jacobian_zend_ppar with a finite difference method (applying the + # boundary condition function with a perturbed pressure `ppar[end] + ϵ` for + # some small ϵ) rather than calculating it using the method below. If we used + # the finite difference approach, we would have to be careful that the ϵ step + # does not change the cutoff index (otherwise we would pick up non-smooth + # changes) - one possibility might to be to switch to `-ϵ` instead of `+ϵ` if + # this happens. + + dpdfdv_near_zero = @view vpa.scratch[first_point_near_zero:sigma_ind] + @views interpolate_symmetric!(dpdfdv_near_zero, + vpa_unnorm[first_point_near_zero:sigma_ind], + pdf_upper[sigma_ind+1:element_with_zero_boundary,ivperp], + vpa_unnorm[sigma_ind+1:element_with_zero_boundary], + Val(1)) + # The above call to interpolate_symmetric calculates dg/dv rather than dg/dw, + # so need to multiply by an extra factor of vthe. + # δg(w|v<0) = -u/ppar * dg/dv(2*u/vth - w) * δppar + @. jacobian_zend_ppar[first_point_near_zero:sigma_ind] -= + -upar_upper / ppar_upper * dpdfdv_near_zero + + dpdfdw_far_from_zero = @view vpa.scratch[first_nonzero_ind:first_point_near_zero-1] + @views interpolate_to_grid_1d!(dpdfdw_far_from_zero, + reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:vpa.n-first_nonzero_ind+1], + pdf_upper[:,ivperp], vpa, vpa_spectral, Val(1)) + reverse!(dpdfdw_far_from_zero) + # Note that because we calculated the derivative of the interpolating + # function, and then reversed the results, we need to multiply the derivative + # by -1. + @. jacobian_zend_ppar[first_nonzero_ind:first_point_near_zero-1] -= + upar_upper / vthe_upper / ppar_upper * dpdfdw_far_from_zero + + # Whatever the variation due to interpolation is at the last nonzero grid + # point, it will be reduced by the cutoff. + if minus_vcut_fraction < 0.5 + jacobian_zend_ppar[first_nonzero_ind] *= 0.5 - minus_vcut_fraction + else + jacobian_zend_ppar[first_nonzero_ind] *= 1.5 - minus_vcut_fraction + end + + # The change in electron_ppar also changes the position of + # wcut = (-vcut - upar)/vthe + # as vcut does not change (within the Krylov iteration where this + # preconditioner matrix is used), but vthe does because of the change in + # electron_ppar. We actually use minus_vcut_fraction calculated from + # vpa_unnorm, so it is most convenient to consider: + # v = vthe * w + upar + # δv = δvthe * w + # = δvthe * (v - upar)/vthe + # = δppar * vthe / (2*ppar) * (v - upar)/vthe + # = δppar * (v - upar) / 2 / ppar + # with vl and vu the values of v at the grid points below and above vcut + # minus_vcut_fraction = (-vcut - vl) / (vu - vl) + # δminus_vcut_fraction = -(-vcut - vl) / (vu - vl)^2 * (δvu - δvl) - δvl / (vu - vl) + # δminus_vcut_fraction = [-(-vcut - vl) / (vu - vl)^2 * (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δminus_vcut_fraction = [-(-vcut - vl) / (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar + # δminus_vcut_fraction = -(-vcut - upar) / (vu - vl) / 2 / ppar * δppar + # δminus_vcut_fraction = (vcut + upar) / (vu - vl) / 2 / ppar * δppar + interpolated_pdf_at_first_nonzero_ind = @view vpa.scratch[1:1] + reversed_first_nonzero_ind = vpa.n-first_nonzero_ind+1 + @views interpolate_to_grid_1d!(interpolated_pdf_at_first_nonzero_ind, + reversed_wpa_of_minus_vpa[reversed_first_nonzero_ind:reversed_first_nonzero_ind], + pdf_upper[:,ivperp], vpa, vpa_spectral) + + dminus_vcut_fraction_dp = (vcut + upar_upper) / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) / 2.0 / ppar_upper + # Note that pdf_upper[first_nonzero_ind,ivperp] depends on -minus_vcut_fraction, so + # need a -'ve sign in the following line. + jacobian_zend_ppar[first_nonzero_ind] -= -interpolated_pdf_at_first_nonzero_ind[] * dminus_vcut_fraction_dp + + # Calculate some numerical integrals of dpdfdw that we will need later + function get_part3_for_one_moment_upper(integral_pieces) + # Integral contribution from the cell containing sigma + # The contribution from integral_pieces[sigma_ind+1] should be dropped, + # because that point is on the input grid, and this correction is due to + # the changes in interpolated values on the output grid due to δp_e∥. The + # value of g_e at sigma_ind+1 isn't interpolated so does not change in + # this way. + integral_sigma_cell = 0.5 * integral_pieces[sigma_ind] + + part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1]) + part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell + + return part3 + end + # The contents of jacobian_zend_ppar at this point are the coefficients needed + # to get the new distribution function due to a change δp, which we can now + # use to calculate the response of various integrals to the same change. Note + # that jacobian_zend_ppar already contains `2.0*dsigma_dp`. + @. vpa.scratch = -jacobian_zend_ppar * vpa.wgts / sqrt(π) + da3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + db3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dc3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dd3_dp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch = -jacobian_zend_ppar * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2) + vpa.scratch[sigma_ind:sigma_ind+1] .= 0.0 + dalpha_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dbeta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dgamma_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + ddelta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + depsilon_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + dzeta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + @. vpa.scratch *= vpa_unnorm / vthe_upper + deta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch) + + pdf_upperz = vpa.scratch10 + @views @. pdf_upperz[sigma_ind:end] = pdf_upper[sigma_ind:end,ivperp] + + # interpolate the pdf_upperz onto this grid + # 'near zero' means in the range where + # abs(v_∥)≤abs(upper boundary of element including v_∥=0) + # 'far from zero' means more negative values of v_∥. + + # Interpolate to the 'near zero' points + @views interpolate_symmetric!(pdf_upperz[first_point_near_zero:sigma_ind], + vpa_unnorm[first_point_near_zero:sigma_ind], + pdf_upperz[sigma_ind+1:element_with_zero_boundary], + vpa_unnorm[sigma_ind+1:element_with_zero_boundary]) + + # Interpolate to the 'far from zero' points + reversed_pdf_far_from_zero = @view pdf_upperz[1:first_point_near_zero-1] + interpolate_to_grid_1d!(reversed_pdf_far_from_zero, + reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end], + pdf_upperz, vpa, vpa_spectral) + reverse!(reversed_pdf_far_from_zero) + + plus_vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + if plus_vcut_fraction > 0.5 + upper_cutoff_ind = plus_vcut_ind+1 + else + upper_cutoff_ind = plus_vcut_ind + end + if minus_vcut_fraction < 0.5 + lower_cutoff_ind = minus_vcut_ind-1 + lower_cutoff_factor = 0.5 - minus_vcut_fraction + else + lower_cutoff_ind = minus_vcut_ind + lower_cutoff_factor = 1.5 - minus_vcut_fraction + end + pdf_upperz[lower_cutoff_ind] *= lower_cutoff_factor + pdf_upperz[1:lower_cutoff_ind-1] .= 0.0 + + a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta = + get_upperz_integral_correction_components( + pdf_upperz, vthe_upper, vpa, vpa_unnorm, u_over_vt, + sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, false) + + output_range = lower_cutoff_ind:sigma_ind + + v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_upper + + correction_matrix = [alpha beta gamma delta ; + beta gamma delta epsilon ; + gamma delta epsilon zeta ; + delta epsilon zeta eta + ] + + # jacobian_zbegin_ppar state at this point would generate (when multiplied by + # some δp) a δf due to the effectively-shifted grid. The unperturbed f + # required corrections to make its integrals correct. Need to apply the same + # corrections to δf. + # This term seems to have a very small contribution, and could probably be + # skipped. + A, B, C, D = correction_matrix \ [a2-a3, -b2-b3, c2-c3, -d2-d3] + @. jacobian_zend_ppar[output_range] *= + 1.0 + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) + + # Calculate the changes in the integrals a2, b2, c2, d2, a3, b3, c3, and d3 in + # response to changes in electron_ppar. The changes are calculated for the + # combinations (a2-a3), (-b2-b3), (c2-c3), and (-d2-d3) to take advantage of + # some cancellations. + + # Need to calculate the variation of various intermediate quantities with + # δp. + # + # vth = sqrt(2*p/n) + # δvth = vth / (2 * p) * δp + # + # sigma = -u / vth + # δsigma = u / vth^2 δvth + # = u / (2 * vth * p) * δp + # + # We could write sigma_fraction as + # sigma_fraction = (sigma - vpa[sigma_ind]) / (vpa[sigma_ind+1] - vpa[sigma_ind]) + # so that + # δsigma_fraction = δsigma / (vpa[sigma_ind+1] - vpa[sigma_ind]) + # = u / (2 * vth * p) / (vpa[sigma_ind+1] - vpa[sigma_ind]) * δp + # + # minus_vcut_fraction = ((-vcut - u)/vth - vpa[minus_vcut_ind-1]) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) + # δminus_vcut_fraction = (vcut + u) / vth^2 / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δvth + # = (vcut + u) / (2 * vth * p) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δp + # + # plus_vcut_fraction = ((vcut - u)/vth - vpa[plus_vcut_ind-1]) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) + # δplus_vcut_fraction = -(vcut - u) / vth^2 / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δvth + # = -(vcut - u) / (2 * vth * p) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δp + + density_integral_pieces_upperz = vpa.scratch3 + flow_integral_pieces_upperz = vpa.scratch4 + energy_integral_pieces_upperz = vpa.scratch5 + cubic_integral_pieces_upperz = vpa.scratch6 + quartic_integral_pieces_upperz = vpa.scratch7 + fill_integral_pieces!( + pdf_upperz, vthe_upper, vpa, vpa_unnorm, density_integral_pieces_upperz, + flow_integral_pieces_upperz, energy_integral_pieces_upperz, + cubic_integral_pieces_upperz, quartic_integral_pieces_upperz) + vpa_grid = vpa.grid + + dsigma_dp = upar_upper / (2.0 * vthe_upper * ppar_upper) + + dsigma_fraction_dp = dsigma_dp / (vpa_grid[sigma_ind+1] - vpa_grid[sigma_ind]) + + dminus_vcut_fraction_dp = (vcut + upar_upper) / (2.0 * vthe_upper * ppar_upper) / (vpa_grid[minus_vcut_ind] - vpa_grid[minus_vcut_ind-1]) + + dplus_vcut_fraction_dp = -(vcut - upar_upper) / (2.0 * vthe_upper * ppar_upper) / (vpa_grid[plus_vcut_ind+1] - vpa_grid[plus_vcut_ind]) + + + density_integral_sigma_cell = 0.5 * (density_integral_pieces_upperz[sigma_ind] + + density_integral_pieces_upperz[sigma_ind+1]) + da2_minus_a3_dp = ( + # Contribution from integral limits at sigma + -2.0 * density_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in da3_dp + + density_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # No contribution from w-integral for a2 or a3 as integrand does not + # depend on ppar. + # Change in a3 integral due to different interpolated ̂g values + - da3_dp + ) + + dminus_b2_minus_b3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in db3_dp + - flow_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + (a2 + a3) * dsigma_dp + # Change in b3 integral due to different interpolated ̂g values + - db3_dp + ) + + energy_integral_sigma_cell = 0.5 * (energy_integral_pieces_upperz[sigma_ind] + + energy_integral_pieces_upperz[sigma_ind+1]) + dc2_minus_c3_dp = ( + # Contribution from integral limits at sigma + -2.0 * energy_integral_sigma_cell * dsigma_fraction_dp + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in dc3_dp + + energy_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 2.0 * (-b2 + b3) * dsigma_dp + # Change in c3 integral due to different interpolated ̂g values + - dc3_dp + ) + + dminus_d2_minus_d3_dp = ( + # Contribution from integral limits at sigma cancel exactly + # Contribution from integral limits at wcut+. The contribution from -wcut- + # is included in dc3_dp + - cubic_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp + # Contribution from w-integral due to variation of integrand with ppar. + + 3.0 * (c2 + c3) * dsigma_dp + # Change in d3 integral due to different interpolated ̂g values + - dd3_dp + ) + + correction0_integral_pieces = @. vpa.scratch3 = pdf_upperz * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2) + for ivpa ∈ sigma_ind:vpa.n + correction0_integral_pieces[ivpa] = 0.0 + end + + correctionminus1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces / vpa_unnorm * vthe_upper + integralminus1 = sum(@view(correctionminus1_integral_pieces[lower_cutoff_ind:sigma_ind-1])) + + correction0_integral_type2_pieces = @. vpa.scratch4 = pdf_upperz * vpa.wgts / sqrt(π) * 2.0 * integral_correction_sharpness^2 * vpa_unnorm^3 / vthe_upper^3 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2)^2 + integral_type2 = sum(@view(correction0_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dalpha_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dalpha_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + (-2.0 * integralminus1 + integral_type2) * dsigma_dp + # Change in alpha integral due to different interpolated ̂g values + + dalpha_dp_interp + ) + + correction1_integral_pieces = @. vpa.scratch5 = correction0_integral_pieces * vpa_unnorm / vthe_upper + correction1_integral_type2_pieces = @. vpa.scratch6 = correction0_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction1_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dbeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dbeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * alpha + integral_type2) * dsigma_dp + # Change in beta integral due to different interpolated ̂g values + + dbeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction1_integral_pieces + # and correction1_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe_upper + correction2_integral_type2_pieces = @. vpa.scratch6 = correction1_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction2_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dgamma_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dgamma_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * beta + integral_type2) * dsigma_dp + # Change in gamma integral due to different interpolated ̂g values + + dgamma_dp_interp + ) + + # Here we overwrite the buffers that were used for correction2_integral_pieces + # and correction2_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction3_integral_pieces = @. vpa.scratch5 = correction2_integral_pieces * vpa_unnorm / vthe_upper + correction3_integral_type2_pieces = @. vpa.scratch6 = correction2_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction3_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + ddelta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in ddelta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * gamma + integral_type2) * dsigma_dp + # Change in delta integral due to different interpolated ̂g values + + ddelta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction3_integral_pieces + # and correction3_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction4_integral_pieces = @. vpa.scratch5 = correction3_integral_pieces * vpa_unnorm / vthe_upper + correction4_integral_type2_pieces = @. vpa.scratch6 = correction3_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction4_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + depsilon_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in depsilon_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * delta + integral_type2) * dsigma_dp + # Change in epsilon integral due to different interpolated ̂g values + + depsilon_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction5_integral_pieces = @. vpa.scratch5 = correction4_integral_pieces * vpa_unnorm / vthe_upper + correction5_integral_type2_pieces = @. vpa.scratch6 = correction4_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction5_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + dzeta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in dzeta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * epsilon + integral_type2) * dsigma_dp + # Change in zeta integral due to different interpolated ̂g values + + dzeta_dp_interp + ) + + # Here we overwrite the buffers that were used for correction4_integral_pieces + # and correction4_integral_type2_pieces, but this is OK as we never need those + # arrays again. + correction6_integral_pieces = @. vpa.scratch5 = correction5_integral_pieces * vpa_unnorm / vthe_upper + correction6_integral_type2_pieces = @. vpa.scratch6 = correction5_integral_type2_pieces * vpa_unnorm / vthe_upper + integral_type2 = sum(@view(correction6_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1])) + deta_dp = ( + # The grid points either side of sigma are zero-ed out for these + # corrections, so this boundary does not contribute. + # Contribution from integral limit at -wcut- is included in deta_dp_interp + # Contribution from w-integral due to variation of integrand with ppar. + + (-2.0 * zeta + integral_type2) * dsigma_dp + # Change in eta integral due to different interpolated ̂g values + + deta_dp_interp + ) + + dA_dp, dB_dp, dC_dp, dD_dp = correction_matrix \ ( + [da2_minus_a3_dp, dminus_b2_minus_b3_dp, dc2_minus_c3_dp, dminus_d2_minus_d3_dp] + - [dalpha_dp dbeta_dp dgamma_dp ddelta_dp ; + dbeta_dp dgamma_dp ddelta_dp depsilon_dp ; + dgamma_dp ddelta_dp depsilon_dp dzeta_dp ; + ddelta_dp depsilon_dp dzeta_dp deta_dp + ] + * [A, B, C, D] + ) + + output_range = lower_cutoff_ind:sigma_ind-1 + v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_upper + @views @. jacobian_zend_ppar[output_range] -= + (dA_dp + + dB_dp * v_over_vth + + dC_dp * v_over_vth^2 + + dD_dp * v_over_vth^3) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_upperz[output_range] + + # Add variation due to variation of ̃v coordinate with δp. + # These contributions seem to make almost no difference, and could probably be + # skipped. + dv_over_vth_dp = -dsigma_dp + @views @. jacobian_zend_ppar[output_range] -= ( + (B * dv_over_vth_dp + + 2.0 * C * v_over_vth * dv_over_vth_dp + + 3.0 * D * v_over_vth^2 * dv_over_vth_dp) * + integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) * + pdf_upperz[output_range] + + + (A + + B * v_over_vth + + C * v_over_vth^2 + + D * v_over_vth^3) * + (2.0 * integral_correction_sharpness * v_over_vth * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2) + - 2.0 * integral_correction_sharpness^2 * v_over_vth^3 * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)^2) * + pdf_upperz[output_range] + ) + end + end + + return nothing +end + """ electron_adaptive_timestep_update!(scratch, t, t_params, moments, phi, z_advect, vpa_advect, composition, r, z, vperp, vpa, @@ -3374,11 +4564,12 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati `evolve_ppar=true`) the electron energy equation. """ @timeit global_timer fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, - vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, t_params, ion_dt, ir, evolve_ppar, - include=:all, include_qpar_integral_terms=true) = begin + jacobian_matrix, f, ppar, moments, phi_global, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, + evolve_ppar, include=:all, + include_qpar_integral_terms=true) = begin dt = t_params.dt[] buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] @@ -3396,6 +4587,7 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati dppar_dz = @view moments.electron.dppar_dz[:,ir] dvth_dz = @view moments.electron.dvth_dz[:,ir] dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + phi = @view phi_global[:,ir] upar_ion = @view moments.ion.upar[:,ir,1] @@ -3517,19 +4709,24 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( jacobian_matrix, z, dt, ion_dt, ir, include; ppar_offset=pdf_size) end + if t_params.include_wall_bc_in_preconditioner + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, include; + ppar_offset=pdf_size) + end return nothing end """ - fill_electron_kinetic_equation_v_only_Jacobian!(jacobian_matrix, f, ppar, moments, - collisions, composition, z, vperp, - vpa, z_spectral, vperp_specral, - vpa_spectral, z_advect, vpa_advect, - scratch_dummy, - external_source_settings, - num_diss_params, t_params, ion_dt, ir, - iz, evolve_ppar, include=:all) + fill_electron_kinetic_equation_v_only_Jacobian!() + jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, zeroth_moment, + first_moment, second_moment, third_moment, dthird_moment_dz, phi, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, + ion_dt, ir, iz, evolve_ppar) Fill a pre-allocated matrix with the Jacobian matrix for a velocity-space solve part of the ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron @@ -3538,7 +4735,7 @@ energy equation. @timeit global_timer fill_electron_kinetic_equation_v_only_Jacobian!( jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, zeroth_moment, first_moment, second_moment, third_moment, - dthird_moment_dz, collisions, composition, z, vperp, vpa, + dthird_moment_dz, phi, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, iz, evolve_ppar) = begin @@ -3557,7 +4754,6 @@ energy equation. upar_ion = moments.ion.upar[iz,ir,1] - pdf_size = z.n * vperp.n * vpa.n v_size = vperp.n * vpa.n # Initialise jacobian_matrix to the identity @@ -3601,6 +4797,14 @@ energy equation. jacobian_matrix, z, dt, ion_dt, ir, iz) end + if t_params.include_wall_bc_in_preconditioner + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :implicit_v, iz; + ppar_offset=v_size) + end + return nothing end diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 1b616116d..8bfd386ea 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -29,8 +29,9 @@ using SparseMatricesCSR using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float import ..calculus: elementwise_derivative!, mass_matrix_solve! -import ..interpolation: single_element_interpolate! -using ..lagrange_polynomials: lagrange_poly_optimised +import ..interpolation: single_element_interpolate!, + fill_single_element_interpolation_matrix! +using ..lagrange_polynomials: lagrange_poly_optimised, lagrange_poly_derivative_optimised using ..moment_kinetics_structs: weak_discretization_info @@ -386,7 +387,8 @@ end Function to perform interpolation on a single element. """ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, - gausslegendre::gausslegendre_base_info) + gausslegendre::gausslegendre_base_info, + derivative::Val{0}) n_new = length(newgrid) i = 1 @@ -408,6 +410,50 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c return nothing end +""" +Function to carry out a 1D (global) mass matrix solve. +""" +# Evaluate first derivative of the interpolating function +function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord, + gausslegendre::gausslegendre_base_info, + derivative::Val{1}) + n_new = length(newgrid) + + i = 1 + other_nodes = @view coord.other_nodes[:,i,ielement] + one_over_denominator = coord.one_over_denominator[i,ielement] + this_f = f[i] + for j ∈ 1:n_new + result[j] = this_f * lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, newgrid[j]) + end + for i ∈ 2:coord.ngrid + other_nodes = @view coord.other_nodes[:,i,ielement] + one_over_denominator = coord.one_over_denominator[i,ielement] + this_f = f[i] + for j ∈ 1:n_new + result[j] += this_f * lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, newgrid[j]) + end + end + + return nothing +end + +function fill_single_element_interpolation_matrix!( + matrix_slice, newgrid, jelement, coord, + gausslegendre::gausslegendre_base_info) + n_new = length(newgrid) + + for j ∈ 1:coord.ngrid + other_nodes = @view coord.other_nodes[:,j,jelement] + one_over_denominator = coord.one_over_denominator[j,jelement] + for i ∈ 1:n_new + matrix_slice[i,j] = lagrange_poly_optimised(other_nodes, one_over_denominator, newgrid[i]) + end + end + + return nothing +end + """ Function to carry out a 1D (global) mass matrix solve. """ diff --git a/moment_kinetics/src/hermite_spline_interpolation.jl b/moment_kinetics/src/hermite_spline_interpolation.jl index 255c5da91..03453fbfc 100644 --- a/moment_kinetics/src/hermite_spline_interpolation.jl +++ b/moment_kinetics/src/hermite_spline_interpolation.jl @@ -21,9 +21,16 @@ coord : coordinate not_spectral : finite_difference_info A finite_difference_info argument here indicates that the coordinate is not spectral-element discretized, i.e. it is on a uniform ('finite difference') grid. +derivative : Val(n) + The value of `n` the integer in the `Val{n}` indicates the order of the derivative to + be calculated of the interpolating function (only a few values of `n` are supported). + Defaults to Val(0), which means just calculating the interpolating function itself. """ +function interpolate_to_grid_1d! end + function interpolate_to_grid_1d!(result, new_grid, f, coord, - not_spectral::finite_difference_info) + not_spectral::finite_difference_info, + derivative::Val{0}=Val(0)) x = coord.grid n_new = length(new_grid) @@ -101,5 +108,10 @@ function interpolate_to_grid_1d!(result, new_grid, f, coord, return nothing end +function interpolate_to_grid_1d!(result, new_grid, f, coord, + not_spectral::finite_difference_info, + derivative::Val{1}) + error("First derivative interpolation not implemented for finite-difference yet.") +end end # hermite_spline_interpolation diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index df191e544..ca9856868 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -761,6 +761,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.global_precon_iterations, nl_solver_params.electron_advance.solves_since_precon_update, nl_solver_params.electron_advance.precon_dt, + nl_solver_params.electron_advance.precon_lowerz_vcut_inds, + nl_solver_params.electron_advance.precon_upperz_vcut_inds, nl_solver_params.electron_advance.serial_solve, nl_solver_params.electron_advance.max_nonlinear_iterations_this_step, nl_solver_params.electron_advance.max_linear_iterations_this_step, diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 094773a00..a367b2a68 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -89,6 +89,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero cap_factor_ion_dt::mk_float max_pseudotimesteps::mk_int max_pseudotime::mk_float + include_wall_bc_in_preconditioner::Bool write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool diff --git a/moment_kinetics/src/interpolation.jl b/moment_kinetics/src/interpolation.jl index 69041a463..c87d6b104 100644 --- a/moment_kinetics/src/interpolation.jl +++ b/moment_kinetics/src/interpolation.jl @@ -5,7 +5,8 @@ Note these are not guaranteed to be highly optimized! """ module interpolation -export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric! +export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric!, + fill_interpolate_symmetric_matrix! using ..array_allocation: allocate_float using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info @@ -44,10 +45,15 @@ coord : coordinate spectral : discretization_info struct containing information for discretization, whose type determines which method is used. +derivative : Val(n) + The value of `n` the integer in the `Val{n}` indicates the order of the derivative to + be calculated of the interpolating function (only a few values of `n` are supported). + Defaults to Val(0), which means just calculating the interpolating function itself. """ function interpolate_to_grid_1d! end -function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) +function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral, + derivative::Val=Val(0)) # define local variable nelement for convenience nelement = coord.nelement_local @@ -59,7 +65,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) # Assumes points in newgrid are sorted. # May not be the most efficient algorithm. # Find start/end points for each element, storing their indices in kstart - kstart = Vector{mk_int}(undef, nelement+1) + kstart = coord.scratch_int_nelement_plus_1 # First element includes both boundary points, while all others have only one (to # avoid duplication), so calculate the first element outside the loop. @@ -82,10 +88,21 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) end # check to see if any of the newgrid points are to the left of the first grid point - for j ∈ 1:kstart[1]-1 - # if the new grid location is outside the bounds of the original grid, - # extrapolate f with Gaussian-like decay beyond the domain - result[j] = f[1] * exp(-(coord.grid[1] - newgrid[j])^2) + if isa(derivative, Val{0}) + for j ∈ 1:kstart[1]-1 + # if the new grid location is outside the bounds of the original grid, + # extrapolate f with Gaussian-like decay beyond the domain + result[j] = f[1] * exp(-(coord.grid[1] - newgrid[j])^2) + end + elseif isa(derivative, Val{1}) + for j ∈ 1:kstart[1]-1 + # if the new grid location is outside the bounds of the original grid, + # extrapolate f with Gaussian-like decay beyond the domain, then take + # derivative. + result[j] = 2.0 * (coord.grid[1] - newgrid[j]) * f[1] * exp(-(coord.grid[1] - newgrid[j])^2) + end + else + error("Unrecognised derivative=$derivative") end @inbounds for j ∈ 1:nelement # Search from kstart[j] to try to speed up the sort, but means result of @@ -100,7 +117,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) kmax = kstart[2] - 1 @views single_element_interpolate!(result[kmin:kmax], newgrid[kmin:kmax], f[imin:imax], imin, imax, 1, coord, - first_element_spectral) + first_element_spectral, derivative) end @inbounds for j ∈ 2:nelement kmin = kstart[j] @@ -110,19 +127,115 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral) imax = coord.imax[j] @views single_element_interpolate!(result[kmin:kmax], newgrid[kmin:kmax], f[imin:imax], imin, imax, j, coord, - spectral.lobatto) + spectral.lobatto, derivative) end end - for k ∈ kstart[nelement+1]:n_new - result[k] = f[end] * exp(-(newgrid[k] - coord.grid[end])^2) + if isa(derivative, Val{0}) + for k ∈ kstart[nelement+1]:n_new + result[k] = f[end] * exp(-(newgrid[k] - coord.grid[end])^2) + end + elseif isa(derivative, Val{1}) + for k ∈ kstart[nelement+1]:n_new + result[k] = -2.0 * (newgrid[k] - coord.grid[end]) * f[end] * exp(-(newgrid[k] - coord.grid[end])^2) + end + else + error("Unrecognised derivative=$derivative") + end + + return nothing +end + +""" + fill_single_element_interpolation_matrix!( + matrix_slice, newgrid, jelement, coord, spectral) + +Set `matrix_slice` equal to the interpolation matrix that interpolates values from the +element `jelement` of the vector being multiplied onto the grid points given by `newgrid` +(which must be contained within the physical space covered by element `jelement`). + +`coord` is the `coordinate` object for the dimension in which the interpolation is done, +and `spectral` the discretization object corresponding to `jelement`. +""" +function fill_single_element_interpolation_matrix! end + +function fill_1d_interpolation_matrix!(matrix, newgrid, coord, spectral) + # define local variable nelement for convenience + nelement = coord.nelement_local + + n_new = size(newgrid)[1] + # Find which points belong to which element. + # istart[j] contains the index of the first point in newgrid that is within element + # j, and istart[nelement+1] is n_new if the last point is within coord.grid, or the + # index of the first element outside coord.grid otherwise. + # Assumes points in newgrid are sorted. + # May not be the most efficient algorithm. + # Find start/end points for each element, storing their indices in istart + istart = coord.scratch_int_nelement_plus_1 + + # First element includes both boundary points, while all others have only one (to + # avoid duplication), so calculate the first element outside the loop. + if coord.radau_first_element && coord.irank == 0 + first_element_spectral = spectral.radau + # For a grid with a Radau first element, the lower boundary of the first element + # is at coord=0, and the coordinate range is 0 OptionsDict("run_name" => "jacobian_matrix" "initialization_residual_value" => 2.5, "converged_residual_value" => 1.0e-2, "constraint_forcing_rate" => 2.321, + "include_wall_bc_in_preconditioner" => true, ), "nonlinear_solver" => OptionsDict("nonlinear_max_iterations" => 100, "rtol" => 1.0e-5, @@ -208,6 +215,9 @@ test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix" ) function get_mk_state(test_input) + # Reset timers in case there was a previous run which did not clean them up. + reset_mk_timers!() + mk_state = nothing quietoutput() do mk_state = setup_moment_kinetics(test_input; skip_electron_solve=true) @@ -460,7 +470,9 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -797,7 +809,9 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -1148,7 +1162,9 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -1444,7 +1460,9 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -1759,7 +1777,9 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -2088,7 +2108,9 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -2431,7 +2453,9 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) + if z.bc == "wall" + error("z_bc = \"wall\" not supported here yet.") + elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) # Boundary conditions on incoming part of distribution function. Note # that as density, upar, ppar do not change in this implicit step, # f_electron_newvar, f_old, and residual should all be zero at exactly @@ -3098,18 +3122,22 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 end function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) - test_input = deepcopy(test_input) - test_input["output"]["run_name"] *= "_electron_kinetic_equation" - println(" - electron_kinetic_equation") - @testset "electron_kinetic_equation" begin + # Looser rtol for "wall" bc because integral corrections not accounted for in wall bc + # Jacobian (yet?). + @testset "electron_kinetic_equation bc=$bc" for (bc, adi_tol) ∈ (("constant", 1.0e-15), ("wall", 1.0e-13)) + println(" - electron_kinetic_equation $bc") + this_test_input = deepcopy(test_input) + this_test_input["output"]["run_name"] *= "_electron_kinetic_equation_$bc" + this_test_input["z"]["bc"] = bc + # Suppress console output while running pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, composition, collisions, geometry, gyroavs, boundary_distributions, external_source_settings, num_diss_params, nl_solver_params, advance, advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, - ascii_io, io_moments, io_dfns = get_mk_state(test_input) + ascii_io, io_moments, io_dfns = get_mk_state(this_test_input) dens = @view moments.electron.dens[:,ir] upar = @view moments.electron.upar[:,ir] @@ -3270,9 +3298,9 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) # Add 'explicit' contribution # Use jacobian_matrix as a temporary buffer here. fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, - scratch_dummy, external_source_settings, num_diss_params, + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_v) begin_serial_region() @serial_region begin @@ -3280,10 +3308,10 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, - true) + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, + t_params.electron, ion_dt, ir, true) begin_serial_region() @serial_region begin @@ -3292,7 +3320,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) # Jacobian matrix functions without being too messed up by floating-point # rounding errors. The result is that some entries in the Jacobian matrix # here are O(1.0e5), so it is important to use `rtol` here. - @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-15, atol=1.0e-15) + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=adi_tol, atol=1.0e-15) end end @@ -3320,18 +3348,18 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], ppar[iz], dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, zeroth_moment[iz], first_moment[iz], second_moment[iz], - third_moment[iz], dthird_moment_dz[iz], collisions, composition, z, - vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, - vpa_advect, scratch_dummy, external_source_settings, num_diss_params, - t_params.electron, ion_dt, ir, iz, true) + third_moment[iz], dthird_moment_dz[iz], fields.phi[iz,ir], collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params.electron, ion_dt, ir, iz, true) end # Add 'explicit' contribution # Use jacobian_matrix as a temporary buffer here. fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, - scratch_dummy, external_source_settings, num_diss_params, + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_z) begin_serial_region() @@ -3340,10 +3368,10 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, - true) + jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, + t_params.electron, ion_dt, ir, true) begin_serial_region() @serial_region begin @@ -3352,7 +3380,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) # Jacobian matrix functions without being too messed up by floating-point # rounding errors. The result is that some entries in the Jacobian matrix # here are O(1.0e5), so it is important to use `rtol` here. - @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-13, atol=1.0e-13) + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=10.0*adi_tol, atol=1.0e-13) end end @@ -3420,36 +3448,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) vperp, vperp_spectral, vperp_adv, vperp_diffusion, ir) end - if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1) - # Boundary conditions on incoming part of distribution function. Note - # that as density, upar, ppar do not change in this implicit step, - # f_electron_newvar, f_old, and residual should all be zero at exactly - # the same set of grid points, so it is reasonable to zero-out - # `residual` to impose the boundary condition. We impose this after - # subtracting f_old in case rounding errors, etc. mean that at some - # point f_old had a different boundary condition cut-off index. - begin_vperp_vpa_region() - v_unnorm = vpa.scratch - zero = 1.0e-14 - if z.irank == 0 - iz = 1 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] > -zero - residual_f[ivpa,ivperp,iz] = 0.0 - end - end - end - if z.irank == z.nrank - 1 - iz = z.n - v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true) - @loop_vperp_vpa ivperp ivpa begin - if v_unnorm[ivpa] < zero - residual_f[ivpa,ivperp,iz] = 0.0 - end - end - end - end + zero_z_boundary_condition_points(residual_f, z, vpa, moments, ir) return nothing end @@ -3457,15 +3456,30 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) original_residual_p = allocate_shared_float(size(ppar)...) perturbed_residual_f = allocate_shared_float(size(f)...) perturbed_residual_p = allocate_shared_float(size(ppar)...) + f_plus_delta_f = allocate_shared_float(size(f)...) + f_with_delta_p = allocate_shared_float(size(f)...) + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_plus_delta_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + delta_f[ivpa,ivperp,iz] + f_with_delta_p[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + p_plus_delta_p = allocate_shared_float(size(ppar)...) + begin_z_region() + @loop_z iz begin + p_plus_delta_p[iz] = ppar[iz] + delta_p[iz] + end @testset "δf only" begin residual_func!(original_residual_f, original_residual_p, f, ppar) - residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar) + residual_func!(perturbed_residual_f, perturbed_residual_p, f_plus_delta_f, ppar) begin_serial_region() @serial_region begin delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) residual_update_with_Jacobian = jacobian_matrix * delta_state perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end] @@ -3483,11 +3497,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) @testset "δp only" begin residual_func!(original_residual_f, original_residual_p, f, ppar) - residual_func!(perturbed_residual_f, perturbed_residual_p, f, ppar.+delta_p) + residual_func!(perturbed_residual_f, perturbed_residual_p, f_with_delta_p, p_plus_delta_p) begin_serial_region() @serial_region begin delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_with_delta_p. + delta_state[1:pdf_size] .= vec(f_with_delta_p .- f) delta_state[pdf_size+1:end] .= vec(delta_p) residual_update_with_Jacobian = jacobian_matrix * delta_state perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] @@ -3506,12 +3524,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) @testset "δf and δp" begin residual_func!(original_residual_f, original_residual_p, f, ppar) - residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar.+delta_p) + residual_func!(perturbed_residual_f, perturbed_residual_p, f_plus_delta_f, p_plus_delta_p) begin_serial_region() @serial_region begin delta_state = zeros(mk_float, total_size) - delta_state[1:pdf_size] .= vec(delta_f) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) delta_state[pdf_size+1:end] .= vec(delta_p) residual_update_with_Jacobian = jacobian_matrix * delta_state perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size] @@ -3534,6 +3555,408 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) return nothing end +function test_electron_wall_bc(test_input; atol=(7.0*epsilon)^2) + test_input = deepcopy(test_input) + test_input["output"]["run_name"] *= "_electron_wall_bc" + println(" - electron_wall_bc") + + # This test only affects the end-points in z, so only include those points to avoid an + # over-optimistic error estimate due the time update matrix for all other z-indices + # just being the identity. + test_input["z"]["nelement"] = 1 + test_input["z"]["ngrid"] = 2 + test_input["z"]["bc"] = "wall" + + # Interpolation done during the boundary condition needs to be reasonably accurate for + # the simplified form (without constraints) that is done in the 'Jacobian matrix' to + # match the full version, so increase vpa resolution. + test_input["vpa"]["nelement"] = 256 + test_input["vz"]["nelement"] = 256 + + @testset "electron_wall_bc" begin + # Suppress console output while running + pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa, + vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs, + composition, collisions, geometry, gyroavs, boundary_distributions, + external_source_settings, num_diss_params, nl_solver_params, advance, + advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list, + ascii_io, io_moments, io_dfns = get_mk_state(test_input) + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ppar = @view moments.electron.ppar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + phi = @view fields.phi[:,ir] + z_spectral = spectral_objects.z_spectral + vperp_spectral = spectral_objects.vperp_spectral + vpa_spectral = spectral_objects.vpa_spectral + z_advect = advection_structs.z_advect + vpa_advect = advection_structs.vpa_advect + me = composition.me_over_mi + + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + + v_size = vperp.n * vpa.n + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + z_speed = @view z_advect[1].speed[:,:,:,ir] + + delta_p = allocate_shared_float(size(ppar)...) + p_amplitude = epsilon * maximum(ppar) + f = @view pdf.electron.norm[:,:,:,ir] + begin_serial_region() + @serial_region begin + # Make sure initial condition has some z-variation. As f is 'moment kinetic' this + # means f must have a non-Maxwellian part that varies in z. + f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n) + end + # Ensure initial electron distribution function obeys constraints + hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa) + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f, phi, vth, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, + vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi, ir; bc_constraints=false, update_vcut=false) + delta_f = allocate_shared_float(size(f)...) + f_amplitude = epsilon * maximum(f) + # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes + # low-order moments vanish exactly. + # For this test have no z-dependence in delta_f so that it does not vanish + # at the z-boundaries + begin_serial_region() + @serial_region begin + @. delta_p = p_amplitude + + delta_f .= f_amplitude .* + reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .* + f + end + + pdf_size = length(f) + p_size = length(ppar) + total_size = pdf_size + p_size + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + jacobian_matrix = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix .= 0.0 + end + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + + # Initialise identity matrix. + jacobian_matrix[row,row] = 1.0 + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + # Initialise identity matrix. + jacobian_matrix[row,row] = 1.0 + end + + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :all; + ppar_offset=pdf_size) + + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + + # There is no 'implicit z' contribution for wall bc + + # Add 'explicit' contribution + add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix_ADI_check, phi, f, ppar, vth, upar, z, vperp, vpa, + vperp_spectral, vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :explicit_v; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = pdf_size + iz + + # Initialise identity matrix. + jacobian_matrix_ADI_check[row,:] .= 0.0 + jacobian_matrix_ADI_check[row,row] = 1.0 + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_wall_boundary_condition_to_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], phi[iz], f[:,:,iz], + ppar[iz], vth[iz], upar[iz], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient, me, ir, + :implicit_v, iz; ppar_offset=v_size) + end + + # There is no 'explicit vpa' contribution for wall bc + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + function residual_func!(residual, this_f, this_p) + begin_z_region() + @loop_z iz begin + # update the electron thermal speed using the updated electron + # parallel pressure + vth[iz] = sqrt(abs(2.0 * this_p[iz] / + (dens[iz] * composition.me_over_mi))) + end + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + this_f, phi, vth, upar, z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi, ir; bc_constraints=false, + update_vcut=false) + + # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the + # electron_pdf member of the first argument, so if we set the electron_pdf member + # of the first argument to zero, and pass dt=1, then it will evaluate the time + # derivative, which is the residual for a steady-state solution. + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + # Now + # residual = f_electron_old + dt*RHS(f_electron_newvar) + # so update to desired residual + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz] + end + + # Set residual to zero where pdf_electron is determined by boundary conditions. + if vpa.n > 1 + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin + @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc, + vpa_advect[1].speed[:,ivperp,iz,ir], + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + vpa, vpa_spectral) + end + end + if vperp.n > 1 + begin_z_vpa_region() + enforce_vperp_boundary_condition!(residual, vperp.bc, + vperp, vperp_spectral, vperp_adv, + vperp_diffusion, ir) + end + if z.bc == "wall" + zero_z_boundary_condition_points(residual, z, vpa, moments, ir) + else + error("Testing wall bc but z_bc != \"wall\".") + end + + return nothing + end + + original_residual = allocate_shared_float(size(f)...) + perturbed_residual = allocate_shared_float(size(f)...) + f_plus_delta_f = allocate_shared_float(size(f)...) + f_with_delta_p = allocate_shared_float(size(f)...) + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_plus_delta_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + delta_f[ivpa,ivperp,iz] + f_with_delta_p[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + end + p_plus_delta_p = allocate_shared_float(size(ppar)...) + begin_z_region() + @loop_z iz begin + p_plus_delta_p[iz] = ppar[iz] + delta_p[iz] + end + + @testset "δf only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f_plus_delta_f, ppar) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + zeros(p_size); atol=1.0e-15) + + # Check that something happened, to make sure that for example the + # residual function and Jacobian don't both just zero out the boundary + # points. + @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12 + + # If the boundary condition is correctly implemented in the Jacobian, then + # if f+delta_f obeys the boundary condition, then J*delta_state should + # give zeros in the boundary points. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=atol) + end + end + + @testset "δp only" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f_with_delta_p, p_plus_delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_with_delta_p. + delta_state[1:pdf_size] .= vec(f_with_delta_p .- f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + vec(delta_p); atol=1.0e-15) + + # Check that something happened, to make sure that for example the + # residual function and Jacobian don't both just zero out the boundary + # points. + @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12 + + # Use an absolute tolerance for this test because if we used a norm_factor + # like the other tests, it would be zero to machine precision at some + # points. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=atol) + end + end + + @testset "δf and δp" begin + residual_func!(original_residual, f, ppar) + residual_func!(perturbed_residual, f_plus_delta_f, p_plus_delta_p) + + begin_serial_region() + @serial_region begin + delta_state = zeros(mk_float, total_size) + # Take this difference rather than using delta_f directly because we need + # the effect of the boundary condition having been applied to + # f_plus_delta_f. + delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f) + delta_state[pdf_size+1:end] .= vec(delta_p) + residual_update_with_Jacobian = jacobian_matrix * delta_state + perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size] + + # Check ppar did not get perturbed by the Jacobian + @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end], + vec(delta_p); atol=1.0e-15) + + # Check that something happened, to make sure that for example the + # residual function and Jacobian don't both just zero out the boundary + # points. + @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12 + + # Use an absolute tolerance for this test because if we used a norm_factor + # like the other tests, it would be zero to machine precision at some + # points. + @test elementwise_isapprox(perturbed_residual, + reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n); + rtol=0.0, atol=atol) + end + end + + cleanup_mk_state!(ascii_io, io_moments, io_dfns) + end + + return nothing +end + function runtests() if Sys.isapple() && "CI" ∈ keys(ENV) && global_size[] > 1 # These tests are too slow in the parallel tests job on macOS, so skip in that @@ -3556,6 +3979,7 @@ function runtests() test_electron_implicit_constraint_forcing(test_input) test_electron_energy_equation(test_input) test_ion_dt_forcing_of_electron_ppar(test_input) + test_electron_wall_bc(test_input) test_electron_kinetic_equation(test_input) end diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index 36c74eb21..81d4c87c7 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -58,7 +58,8 @@ function linear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_int, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n, @@ -171,7 +172,8 @@ function nonlinear_test() zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0), - zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_int, 0), zeros(mk_float, 0), zeros(mk_float, 0), + zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0), zeros(mk_float, 0, 0), zeros(mk_float, 0, 0), advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0), zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n,