diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml
new file mode 100644
index 000000000..9fba1454d
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324-wall-Jac.toml
@@ -0,0 +1,162 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+bc = "wall"
+element_spacing_option = "sqrt"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10000
+nwrite_dfns = 100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+include_wall_bc_in_preconditioner = true
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml
new file mode 100644
index 000000000..f7bba1bfe
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-KennedyCarpenterARK324.toml
@@ -0,0 +1,161 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+bc = "wall"
+element_spacing_option = "sqrt"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10000
+nwrite_dfns = 100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml
new file mode 100644
index 000000000..b4f1c6a59
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324-wall-Jac.toml
@@ -0,0 +1,163 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+bc = "wall"
+element_spacing_option = "compressed_2"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+maximum_dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10000
+nwrite_dfns = 100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+include_wall_bc_in_preconditioner = true
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+#vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml
new file mode 100644
index 000000000..9fbb70e16
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z-KennedyCarpenterARK324.toml
@@ -0,0 +1,161 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+bc = "wall"
+element_spacing_option = "compressed_2"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10000
+nwrite_dfns = 100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+#vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml
new file mode 100644
index 000000000..1acf5c04d
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324-wall-Jac.toml
@@ -0,0 +1,162 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+bc = "wall"
+element_spacing_option = "compressed_4"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10000
+nwrite_dfns = 100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+include_wall_bc_in_preconditioner = true
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+#vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml
new file mode 100644
index 000000000..ba5ae4f24
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-compressed-z4-KennedyCarpenterARK324.toml
@@ -0,0 +1,161 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+bc = "wall"
+element_spacing_option = "compressed_4"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10000
+nwrite_dfns = 100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+#vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml
new file mode 100644
index 000000000..19bad4f94
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324-wall-Jac.toml
@@ -0,0 +1,163 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+#nelement_local = 8
+#nelement_local = 16
+bc = "wall"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10 #10000
+nwrite_dfns = 10 #100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+include_wall_bc_in_preconditioner = true
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+#vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml
new file mode 100644
index 000000000..052649878
--- /dev/null
+++ b/examples/kinetic-electrons/wall-bc_recyclefraction0.5_split3_kinetic-coarse_tails-uniform-z-KennedyCarpenterARK324.toml
@@ -0,0 +1,162 @@
+[r]
+ngrid = 1
+nelement = 1
+
+[evolve_moments]
+parallel_pressure = true
+density = true
+moments_conservation = true
+parallel_flow = true
+
+[reactions]
+electron_ionization_frequency = 0.0
+ionization_frequency = 0.5
+charge_exchange_frequency = 0.75
+
+[vz]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 36.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[ion_species_1]
+initial_temperature = 0.1
+initial_density = 1.0
+
+[krook_collisions]
+use_krook = true
+
+[vpa]
+ngrid = 6
+discretization = "gausslegendre_pseudospectral"
+nelement = 31
+L = 30.0
+element_spacing_option = "coarse_tails"
+bc = "zero"
+
+[z]
+ngrid = 5
+discretization = "gausslegendre_pseudospectral"
+nelement = 32
+nelement_local = 4
+#nelement_local = 8
+#nelement_local = 16
+bc = "wall"
+
+[vpa_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_neutral_species_1]
+initialization_option = "gaussian"
+temperature_amplitude = 0.0
+density_amplitude = 0.001
+density_phase = 0.0
+upar_amplitude = -1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[composition]
+T_wall = 0.1
+T_e = 0.2
+electron_physics = "kinetic_electrons"
+recycling_fraction = 0.5
+n_ion_species = 1
+n_neutral_species = 1
+
+[vz_IC_neutral_species_1]
+initialization_option = "gaussian"
+density_amplitude = 1.0
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 0.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[z_IC_ion_species_1]
+initialization_option = "gaussian"
+density_amplitude = 0.001
+temperature_amplitude = 0.0
+density_phase = 0.0
+upar_amplitude = 1.0
+temperature_phase = 0.0
+upar_phase = 0.0
+
+[neutral_species_1]
+initial_temperature = 1.0
+initial_density = 1.0
+
+[timestepping]
+type = "KennedyCarpenterARK324"
+implicit_electron_advance = false
+implicit_electron_ppar = true
+implicit_ion_advance = false
+implicit_vpa_advection = false
+nstep = 1000000
+dt = 1.0e-5
+nwrite = 1000
+nwrite_dfns = 1000
+steady_state_residual = true
+converged_residual_value = 1.0e-3
+
+#write_after_fixed_step_count = true
+#nstep = 1
+#nwrite = 1
+#nwrite_dfns = 1
+
+[electron_timestepping]
+nstep = 5000000
+#nstep = 1
+dt = 1.0e-5
+#maximum_dt = 1.0e-8
+nwrite = 10 #10000
+nwrite_dfns = 10 #100000
+#type = "SSPRK4"
+type = "Fekete4(3)"
+rtol = 1.0e-3
+atol = 1.0e-14
+minimum_dt = 1.0e-9
+decrease_dt_iteration_threshold = 100
+increase_dt_iteration_threshold = 20
+cap_factor_ion_dt = 10.0
+initialization_residual_value = 2.5
+converged_residual_value = 1.0e-2
+
+#debug_io = 1
+
+[nonlinear_solver]
+nonlinear_max_iterations = 100
+rtol = 1.0e-6 #1.0e-8
+atol = 1.0e-14 #1.0e-16
+linear_restart = 5
+preconditioner_update_interval = 100
+
+[ion_source_1]
+active = true
+z_profile = "gaussian"
+z_width = 0.125
+source_strength = 2.0
+source_T = 2.0
+
+[ion_numerical_dissipation]
+#vpa_dissipation_coefficient = 1.0e-1
+#vpa_dissipation_coefficient = 1.0e-2
+#vpa_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
+
+[electron_numerical_dissipation]
+#vpa_dissipation_coefficient = 2.0
+force_minimum_pdf_value = 0.0
+
+[neutral_numerical_dissipation]
+#vz_dissipation_coefficient = 1.0e-1
+#vz_dissipation_coefficient = 1.0e-2
+#vz_dissipation_coefficient = 1.0e-3
+force_minimum_pdf_value = 0.0
diff --git a/moment_kinetics/debug_test/kinetic_electron_inputs.jl b/moment_kinetics/debug_test/kinetic_electron_inputs.jl
index 4f834c319..b235efdc7 100644
--- a/moment_kinetics/debug_test/kinetic_electron_inputs.jl
+++ b/moment_kinetics/debug_test/kinetic_electron_inputs.jl
@@ -91,7 +91,12 @@ test_input = OptionsDict("composition" => OptionsDict("n_ion_species" => 1,
                          "neutral_numerical_dissipation" => OptionsDict("force_minimum_pdf_value" => 0.0,
                                                                         "vz_dissipation_coefficient" => 1e-2))
 
+test_input_adi = deepcopy(test_input)
+test_input_adi["output"]["run_name"] = "kinetic_electron_adi"
+test_input_adi["timestepping"]["implicit_electron_ppar"] = "adi"
+
 
 test_input_list = [
      test_input,
+     test_input_adi,
     ]
diff --git a/moment_kinetics/src/chebyshev.jl b/moment_kinetics/src/chebyshev.jl
index ad8139120..716ca6599 100644
--- a/moment_kinetics/src/chebyshev.jl
+++ b/moment_kinetics/src/chebyshev.jl
@@ -466,7 +466,7 @@ function chebyshev_spectral_derivative!(df,f)
 end
 
 function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord,
-                                     chebyshev::chebyshev_base_info)
+                                     chebyshev::chebyshev_base_info, derivative::Val{0})
     # Temporary buffer to store Chebyshev coefficients
     cheby_f = chebyshev.df
 
@@ -495,6 +495,71 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c
     return nothing
 end
 
+# Evaluate first derivative of the interpolating function
+function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord,
+                                     chebyshev::chebyshev_base_info, derivative::Val{1})
+    # Temporary buffer to store Chebyshev coefficients
+    cheby_f = chebyshev.df
+
+    if length(cheby_f) == 1
+        # Chebyshev 'polynomial' is only a constant, so derivative must be zero?
+        result .= 0.0
+        return nothing
+    end
+
+    # Need to transform newgrid values to a scaled z-coordinate associated with the
+    # Chebyshev coefficients to get the interpolated function values. Transform is a
+    # shift and scale so that the element coordinate goes from -1 to 1
+    shift = 0.5 * (coord.grid[imin] + coord.grid[imax])
+    scale = 2.0 / (coord.grid[imax] - coord.grid[imin])
+
+    # Get Chebyshev coefficients
+    chebyshev_forward_transform!(cheby_f, chebyshev.fext, f, chebyshev.forward, coord.ngrid)
+
+    if length(cheby_f) == 2
+        # Handle this case specially because the generic algorithm below uses cheby_f[3]
+        # explicitly.
+        for i ∈ 1:length(newgrid)
+            x = newgrid[i]
+            z = scale * (x - shift)
+
+            # cheby_f[2] is multiplied by U_0, but U_0 = 1
+            result[i] = cheby_f[2] * scale
+        end
+        return nothing
+    end
+
+    # Need to evaluate first derivatives of the Chebyshev polynomials. Can do this using
+    # the differentiation formula
+    # https://en.wikipedia.org/wiki/Chebyshev_polynomials#Differentiation_and_integration
+    #   d T_n / dz = n * U_(n-1)
+    # where U_n are the Chebyshev polynomials of the second kind, which in turn can be
+    # evaluated using the recurrence relation
+    # https://en.wikipedia.org/wiki/Chebyshev_polynomials#Recurrence_definition
+    #   U_0 = 1
+    #   U_1 = 2*z
+    #   U_(n+1) = 2*z*U_n - U_(n-1)
+    for i ∈ 1:length(newgrid)
+        x = newgrid[i]
+        z = scale * (x - shift)
+        # Evaluate sum of Chebyshev polynomials at z using recurrence relation
+        U_nminus1 = 1.0
+        U_n = 2.0*z
+
+        # Note that the derivative of the zero'th Chebyshev polynomial (a constant) does
+        # not contribute, so cheby_f[1] is not used.
+        nplus1 = 2.0
+        result[i] = cheby_f[2] * U_nminus1 * scale + cheby_f[3] * nplus1 * U_n * scale
+        for coef ∈ @view(cheby_f[4:end])
+            nplus1 += 1.0
+            U_nminus1, U_n = U_n, 2.0 * z * U_n - U_nminus1
+            result[i] += coef * nplus1 * U_n * scale
+        end
+    end
+
+    return nothing
+end
+
 """
 returns wgts array containing the integration weights associated
 with all grid points for Clenshaw-Curtis quadrature
diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl
index 373d7c8f8..7f71fd12a 100644
--- a/moment_kinetics/src/coordinates.jl
+++ b/moment_kinetics/src/coordinates.jl
@@ -8,7 +8,7 @@ export set_element_boundaries
 
 using LinearAlgebra
 using ..type_definitions: mk_float, mk_int, OptionsDict
-using ..array_allocation: allocate_float, allocate_shared_float, allocate_int
+using ..array_allocation: allocate_float, allocate_shared_float, allocate_int, allocate_shared_int
 using ..calculus: derivative!
 using ..chebyshev: scaled_chebyshev_grid, scaled_chebyshev_radau_grid, setup_chebyshev_pseudospectral
 using ..communication
@@ -25,7 +25,7 @@ using OrderedCollections: OrderedDict
 """
 structure containing basic information related to coordinates
 """
-struct coordinate{T <: AbstractVector{mk_float},Tbparams}
+struct coordinate{T <: AbstractVector{mk_float}, Ti <: AbstractVector{mk_int}, Tbparams}
     # name is the name of the variable associated with this coordiante
     name::String
     # n_global is the total number of grid points associated with this coordinate
@@ -95,6 +95,11 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams}
     scratch8::Array{mk_float,1}
     # scratch9 is an array used for intermediate calculations requiring n entries
     scratch9::Array{mk_float,1}
+    # scratch10 is an array used for intermediate calculations requiring n entries
+    scratch10::Array{mk_float,1}
+    # scratch_int_nelement_plus_1 is an integer array used for intermediate calculations
+    # requiring nelement+1 entries
+    scratch_int_nelement_plus_1::Array{mk_int,1}
     # scratch_shared is a shared-memory array used for intermediate calculations requiring
     # n entries
     scratch_shared::T
@@ -104,6 +109,12 @@ struct coordinate{T <: AbstractVector{mk_float},Tbparams}
     # scratch_shared3 is a shared-memory array used for intermediate calculations requiring
     # n entries
     scratch_shared3::T
+    # scratch_shared_int is a shared-memory array used for intermediate calculations
+    # requiring n integer entries
+    scratch_shared_int::Ti
+    # scratch_shared_int is a shared-memory array used for intermediate calculations
+    # requiring n integer entries
+    scratch_shared_int2::Ti
     # scratch_2d and scratch2_2d are arrays used for intermediate calculations requiring
     # ngrid x nelement entries
     scratch_2d::Array{mk_float,2}
@@ -307,14 +318,21 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false,
     duniform_dgrid = allocate_float(coord_input.ngrid, coord_input.nelement_local)
     # scratch is an array used for intermediate calculations requiring n entries
     scratch = allocate_float(n_local)
+    # scratch_int_nelement_plus_1 is an array used for intermediate calculations requiring
+    # nelement+1 entries
+    scratch_int_nelement_plus_1 = allocate_int(coord_input.nelement_local + 1)
     if ignore_MPI
         scratch_shared = allocate_float(n_local)
         scratch_shared2 = allocate_float(n_local)
         scratch_shared3 = allocate_float(n_local)
+        scratch_shared_int = allocate_int(n_local)
+        scratch_shared_int2 = allocate_int(n_local)
     else
         scratch_shared = allocate_shared_float(n_local)
         scratch_shared2 = allocate_shared_float(n_local)
         scratch_shared3 = allocate_shared_float(n_local)
+        scratch_shared_int = allocate_shared_int(n_local)
+        scratch_shared_int2 = allocate_shared_int(n_local)
     end
     # Initialise scratch_shared* so that the debug checks do not complain when they get
     # printed by `println(io, all_inputs)` in mk_input().
@@ -322,6 +340,8 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false,
         scratch_shared .= NaN
         scratch_shared2 .= NaN
         scratch_shared3 .= NaN
+        scratch_shared_int .= typemin(mk_int)
+        scratch_shared_int2 .= typemin(mk_int)
     end
     if !ignore_MPI
         _block_synchronize()
@@ -387,9 +407,10 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false,
         coord_input.cheb_option, coord_input.bc, coord_input.boundary_parameters, wgts,
         uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch),
         copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch),
-        copy(scratch), scratch_shared, scratch_shared2, scratch_shared3, scratch_2d,
-        copy(scratch_2d), advection, send_buffer, receive_buffer, comm, local_io_range,
-        global_io_range, element_scale, element_shift,
+        copy(scratch), copy(scratch), scratch_int_nelement_plus_1, scratch_shared,
+        scratch_shared2, scratch_shared3, scratch_shared_int, scratch_shared_int2,
+        scratch_2d, copy(scratch_2d), advection, send_buffer, receive_buffer, comm,
+        local_io_range, global_io_range, element_scale, element_shift,
         coord_input.element_spacing_option, element_boundaries, radau_first_element,
         other_nodes, one_over_denominator)
 
diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 49b5e81fb..2bf8685a1 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -18,8 +18,8 @@ using ..calculus: derivative!, second_derivative!, integral,
 using ..communication
 using ..gauss_legendre: gausslegendre_info
 using ..input_structs
-using ..interpolation: interpolate_to_grid_1d!,
-                       interpolate_symmetric!
+using ..interpolation: interpolate_to_grid_1d!, fill_1d_interpolation_matrix!,
+                       interpolate_symmetric!, fill_interpolate_symmetric_matrix!
 using ..type_definitions: mk_float, mk_int
 using ..array_allocation: allocate_float
 using ..electron_fluid_equations: calculate_electron_moments!,
@@ -69,6 +69,8 @@ using ..velocity_moments: integrate_over_vspace, calculate_electron_moment_deriv
 # Only needed so we can reference it in a docstring
 import ..runge_kutta
 
+const integral_correction_sharpness = 4.0
+
 """
 update_electron_pdf is a function that uses the electron kinetic equation 
 to solve for the updated electron pdf
@@ -895,7 +897,7 @@ global_rank[] == 0 && println("recalculating precon")
                         nl_solver_params.preconditioners[ir]
 
                     fill_electron_kinetic_equation_Jacobian!(
-                        precon_matrix, f_electron_new, electron_ppar_new, moments,
+                        precon_matrix, f_electron_new, electron_ppar_new, moments, phi,
                         collisions, composition, z, vperp, vpa, z_spectral,
                         vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
                         external_source_settings, num_diss_params, t_params, ion_dt,
@@ -1097,10 +1099,10 @@ global_rank[] == 0 && println("recalculating precon")
                         # guess is zero,
                         fill_electron_kinetic_equation_Jacobian!(
                             explicit_J, f_electron_new, electron_ppar_new, moments,
-                            collisions, composition, z, vperp, vpa, z_spectral,
-                            vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
-                            external_source_settings, num_diss_params, t_params, ion_dt, ir,
-                            evolve_ppar, :explicit_z, false)
+                            phi, collisions, composition, z, vperp, vpa, z_spectral,
+                            vperp_spectral, vpa_spectral, z_advect, vpa_advect,
+                            scratch_dummy, external_source_settings, num_diss_params,
+                            t_params, ion_dt, ir, evolve_ppar, :explicit_z, false)
                     end
                     begin_z_region()
                     @loop_z iz begin
@@ -1110,7 +1112,7 @@ global_rank[] == 0 && println("recalculating precon")
                             A, f_electron_new[:,:,iz], electron_ppar_new[iz],
                             dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments,
                             zeroth_moment[iz], first_moment[iz], second_moment[iz],
-                            third_moment[iz], dthird_moment_dz[iz], collisions,
+                            third_moment[iz], dthird_moment_dz[iz], phi[iz], collisions,
                             composition, z, vperp, vpa, z_spectral, vperp_spectral,
                             vpa_spectral, z_advect, vpa_advect, scratch_dummy,
                             external_source_settings, num_diss_params, t_params, ion_dt,
@@ -1150,7 +1152,7 @@ global_rank[] == 0 && println("recalculating precon")
                     # Get sparse matrix for explicit, right-hand-side part of the
                     # solve.
                     fill_electron_kinetic_equation_Jacobian!(
-                        explicit_J, f_electron_new, electron_ppar_new, moments,
+                        explicit_J, f_electron_new, electron_ppar_new, moments, phi,
                         collisions, composition, z, vperp, vpa, z_spectral,
                         vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
                         external_source_settings, num_diss_params, t_params, ion_dt, ir,
@@ -1520,36 +1522,7 @@ global_rank[] == 0 && println("recalculating precon")
                                                       vperp, vperp_spectral, vperp_adv,
                                                       vperp_diffusion, ir)
                 end
-                if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
-                    # Boundary conditions on incoming part of distribution function. Note
-                    # that as density, upar, ppar do not change in this implicit step,
-                    # f_electron_newvar, f_old, and residual should all be zero at exactly
-                    # the same set of grid points, so it is reasonable to zero-out
-                    # `residual` to impose the boundary condition. We impose this after
-                    # subtracting f_old in case rounding errors, etc. mean that at some
-                    # point f_old had a different boundary condition cut-off index.
-                    begin_vperp_vpa_region()
-                    v_unnorm = vpa.scratch
-                    zero = 1.0e-14
-                    if z.irank == 0
-                        v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[1,ir],
-                                                    moments.electron.upar[1,ir], true, true)
-                        @loop_vperp_vpa ivperp ivpa begin
-                            if v_unnorm[ivpa] > -zero
-                                f_electron_residual[ivpa,ivperp,1] = 0.0
-                            end
-                        end
-                    end
-                    if z.irank == z.nrank - 1
-                        v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir],
-                                                    moments.electron.upar[end,ir], true, true)
-                        @loop_vperp_vpa ivperp ivpa begin
-                            if v_unnorm[ivpa] < zero
-                                f_electron_residual[ivpa,ivperp,end] = 0.0
-                            end
-                        end
-                    end
-                end
+                zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir)
 
                 return nothing
             end
@@ -1649,10 +1622,56 @@ global_rank[] == 0 && println("recalculating precon")
                 end
             end
 
+            new_lowerz_vcut_inds = r.scratch_shared_int
+            new_upperz_vcut_inds = r.scratch_shared_int2
             apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z,
                                                     vperp, vpa, vperp_spectral,
                                                     vpa_spectral, vpa_advect,
-                                                    num_diss_params, composition, ir)
+                                                    num_diss_params, composition, ir;
+                                                    lowerz_vcut_inds=new_lowerz_vcut_inds,
+                                                    upperz_vcut_inds=new_upperz_vcut_inds)
+            # Check if either vcut_ind has changed - if either has, recalculate the
+            # preconditioner because the response at the grid point before the cutoff is
+            # sharp, so the preconditioner could be significantly wrong when it was
+            # calculated using the wrong vcut_ind.
+            lower_vcut_changed = @view z.scratch_shared_int[1:1]
+            upper_vcut_changed = @view z.scratch_shared_int[2:2]
+            @serial_region begin
+                if z.irank == 0
+                    precon_lowerz_vcut_inds = nl_solver_params.precon_lowerz_vcut_inds
+                    if all(new_lowerz_vcut_inds .== precon_lowerz_vcut_inds)
+                        lower_vcut_changed[] = 0
+                    else
+                        lower_vcut_changed[] = 1
+                        precon_lowerz_vcut_inds .= new_lowerz_vcut_inds
+                    end
+                end
+                MPI.Bcast!(lower_vcut_changed, comm_inter_block[]; root=0)
+                #req1 = MPI.Ibcast!(lower_vcut_changed, comm_inter_block[]; root=0)
+
+                if z.irank == z.nrank - 1
+                    precon_upperz_vcut_inds = nl_solver_params.precon_upperz_vcut_inds
+                    if all(new_upperz_vcut_inds .== precon_upperz_vcut_inds)
+                        upper_vcut_changed[] = 0
+                    else
+                        upper_vcut_changed[] = 1
+                        precon_upperz_vcut_inds .= new_upperz_vcut_inds
+                    end
+                end
+                MPI.Bcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1)
+                #req2 = MPI.Ibcast!(upper_vcut_changed, comm_inter_block[]; root=n_blocks[]-1)
+
+                # Eventually can use Ibcast!() to make the two broadcasts run
+                # simultaneously, but need the function to be merged into MPI.jl (see
+                # https://github.com/JuliaParallel/MPI.jl/pull/882).
+                #MPI.Waitall([req1, req2])
+            end
+            _block_synchronize()
+            if lower_vcut_changed[] == 1 || upper_vcut_changed[] == 1
+                # One or both of the indices changed for some `ir`, so force the
+                # preconditioner to be recalculated next time.
+                nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval
+            end
 
             if !evolve_ppar
                 # update the electron heat flux
@@ -1938,47 +1957,7 @@ to allow the outer r-loop to be parallelised.
                 enforce_vperp_boundary_condition!(f_electron_residual, vperp.bc, vperp, vperp_spectral,
                                                   vperp_adv, vperp_diffusion)
             end
-            if z.bc ∈ ("wall", "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
-                # Boundary conditions on incoming part of distribution function. Note that
-                # as density, upar, ppar do not change in this implicit step, f_new,
-                # f_old, and residual should all be zero at exactly the same set of grid
-                # points, so it is reasonable to zero-out `residual` to impose the
-                # boundary condition. We impose this after subtracting f_old in case
-                # rounding errors, etc. mean that at some point f_old had a different
-                # boundary condition cut-off index.
-                begin_vperp_vpa_region()
-                v_unnorm = vpa.scratch
-                zero = 1.0e-14
-                if z.irank == 0
-                    iz = 1
-                    v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir],
-                                                fvec_in.electron_upar[iz,ir], true, true)
-                    @loop_vperp_vpa ivperp ivpa begin
-                        if v_unnorm[ivpa] > -zero
-                            f_electron_residual[ivpa,ivperp,iz,ir] = 0.0
-                        end
-                    end
-                end
-                if z.irank == z.nrank - 1
-                    iz = z.n
-                    v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir],
-                                                fvec_in.electron_upar[iz,ir], true, true)
-                    @loop_vperp_vpa ivperp ivpa begin
-                        if v_unnorm[ivpa] < zero
-                            f_electron_residual[ivpa,ivperp,iz,ir] = 0.0
-                        end
-                    end
-                end
-            end
-            begin_z_region()
-            @loop_z iz begin
-                @views moment_constraints_on_residual!(f_electron_residual[:,:,iz],
-                                                       f_electron_new[:,:,iz],
-                                                       (evolve_density=true,
-                                                        evolve_upar=true,
-                                                        evolve_ppar=true),
-                                                       vpa)
-            end
+            zero_z_boundary_condition_points(f_electron_residual, z, vpa, moments, ir)
             return nothing
         end
 
@@ -2078,7 +2057,9 @@ end
 
 function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vperp, vpa,
                                             vperp_spectral, vpa_spectral, vpa_advect,
-                                            num_diss_params, composition)
+                                            num_diss_params, composition;
+                                            lowerz_vcut_inds=nothing,
+                                            upperz_vcut_inds=nothing)
     latest_pdf = this_scratch.pdf_electron
 
     begin_r_z_vperp_vpa_region()
@@ -2093,7 +2074,8 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vp
                    moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral,
                    vpa_spectral, vpa_advect, moments,
                    num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-                   composition.me_over_mi, ir)
+                   composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds,
+                   upperz_vcut_inds=upperz_vcut_inds)
     end
 
     begin_r_z_region()
@@ -2117,7 +2099,8 @@ end
 function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vperp,
                                                  vpa, vperp_spectral, vpa_spectral,
                                                  vpa_advect, num_diss_params, composition,
-                                                 ir)
+                                                 ir; lowerz_vcut_inds=nothing,
+                                                 upperz_vcut_inds=nothing)
     begin_z_vperp_vpa_region()
     @loop_z_vperp_vpa iz ivperp ivpa begin
         f_electron[ivpa,ivperp,iz] = max(f_electron[ivpa,ivperp,iz], 0.0)
@@ -2128,7 +2111,8 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp
                f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir],
                z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments,
                num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-               composition.me_over_mi, ir)
+               composition.me_over_mi, ir; lowerz_vcut_inds=lowerz_vcut_inds,
+               upperz_vcut_inds=upperz_vcut_inds)
 
     begin_z_region()
     A = moments.electron.constraints_A_coefficient
@@ -2148,19 +2132,19 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp
     end
 end
 
-function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
-    u_over_vt = upar[1,ir] / vthe[1,ir]
+function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa)
+    u_over_vt = upar / vthe
 
     # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
     # shift'
     sigma = -u_over_vt
 
-    vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * (vpa.grid - sigma)
+    vpa_unnorm = @. vpa.scratch2 = vthe * (vpa.grid - sigma)
 
     # Initial guess for cut-off velocity is result from previous RK stage (which
     # might be the previous timestep if this is the first stage). Recalculate this
     # value from phi.
-    vcut = sqrt(phi[1,ir] / me_over_mi)
+    vcut = sqrt(phi / me_over_mi)
 
     # -vcut is between minus_vcut_ind-1 and minus_vcut_ind
     minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
@@ -2222,20 +2206,20 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
            reversed_wpa_of_minus_vpa
 end
 
-function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
-    u_over_vt = upar[end,ir] / vthe[end,ir]
+function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa)
+    u_over_vt = upar / vthe
 
     # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
     # shift'
     sigma = -u_over_vt
 
     # Delete the upar contribution here if ignoring the 'upar shift'
-    vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * (vpa.grid - sigma)
+    vpa_unnorm = @. vpa.scratch2 = vthe * (vpa.grid - sigma)
 
     # Initial guess for cut-off velocity is result from previous RK stage (which
     # might be the previous timestep if this is the first stage). Recalculate this
     # value from phi.
-    vcut = sqrt(phi[end,ir] / me_over_mi)
+    vcut = sqrt(phi / me_over_mi)
 
     # vcut is between plus_vcut_ind and plus_vcut_ind+1
     plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
@@ -2305,10 +2289,314 @@ function get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
            (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
 end
 
+function fill_integral_pieces!(
+        pdf_part, vthe, vpa, vpa_unnorm, density_integral_pieces, flow_integral_pieces,
+        energy_integral_pieces, cubic_integral_pieces, quartic_integral_pieces)
+
+    @. density_integral_pieces = pdf_part * vpa.wgts / sqrt(π)
+    @. flow_integral_pieces = density_integral_pieces * vpa_unnorm / vthe
+    @. energy_integral_pieces = flow_integral_pieces * vpa_unnorm / vthe
+    @. cubic_integral_pieces = energy_integral_pieces * vpa_unnorm / vthe
+    @. quartic_integral_pieces = cubic_integral_pieces * vpa_unnorm / vthe
+end
+
+function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime,
+                                              c1, c1prime, c2, c2prime, d1, d1prime,
+                                              e1, e1prime, e2, e2prime, u_over_vt,
+                                              bc_constraints)
+    if bc_constraints
+        alpha = a1 + 2.0 * a2
+        alphaprime = a1prime + 2.0 * a2prime
+        beta = c1 + 2.0 * c2
+        betaprime = c1prime + 2.0 * c2prime
+        gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta
+        gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime
+        delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2
+        deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime
+
+        A = (0.5 * beta - delta) / (beta * gamma - alpha * delta)
+        Aprime = (0.5 * betaprime - deltaprime
+                  - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime)
+                  / (beta * gamma - alpha * delta)
+                 ) / (beta * gamma - alpha * delta)
+        C = (1.0 - alpha * A) / beta
+        Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2
+    else
+        A = 1.0
+        Aprime = 0.0
+        C = 0.0
+        Cprime = 0.0
+    end
+
+    epsilon = A * b1 + C * d1 - u_over_vt
+    epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime
+
+    return epsilon, epsilonprime, A, C
+end
+
+function get_integrals_and_derivatives_lowerz(
+             vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt,
+             density_integral_pieces_lowerz, flow_integral_pieces_lowerz,
+             energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz,
+             quartic_integral_pieces_lowerz, bc_constraints)
+    # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
+    # minus_vcut_ind where -vcut is.
+    vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
+
+    function get_for_one_moment(integral_pieces)
+        # Integral contributions from the cell containing vcut.
+        # Define these as follows to be consistent with the way the cutoff is
+        # applied around plus_vcut_ind below.
+        # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2`
+        # include all the contributions from the grid points
+        # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from
+        # 'inside' the grid cell.
+        if vcut_fraction < 0.5
+            integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) +
+                                       integral_pieces[minus_vcut_ind]
+            integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction)
+
+            # part1prime is d(part1)/d(vcut)
+            part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
+        else
+            integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction)
+            integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] +
+                                       integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5)
+
+            # part1prime is d(part1)/d(vcut)
+            part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
+        end
+
+        part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1
+
+        # Integral contribution from the cell containing sigma
+        integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind])
+
+        part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2])
+        part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell
+        # part2prime is d(part2)/d(vcut)
+        part2prime = -part1prime
+
+        return part1, part1prime, part2, part2prime
+    end
+    this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz)
+    this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz)
+    this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz)
+    this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz)
+    this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz)
+
+    return get_residual_and_coefficients_for_bc(
+               this_a1, this_a1prime, this_a2, this_a2prime, this_b1,
+               this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime,
+               this_d1, this_d1prime, this_e1, this_e1prime, this_e2,
+               this_e2prime, u_over_vt, bc_constraints)...,
+           this_a2, this_b2, this_c2, this_d2
+end
+
+function get_lowerz_integral_correction_components(
+             pdf_slice, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, vcut,
+             minus_vcut_ind, plus_vcut_ind, bc_constraints)
+
+    # Need to recalculate integral pieces with the updated distribution function
+    density_integral_pieces_lowerz = vpa.scratch3
+    flow_integral_pieces_lowerz = vpa.scratch4
+    energy_integral_pieces_lowerz = vpa.scratch5
+    cubic_integral_pieces_lowerz = vpa.scratch6
+    quartic_integral_pieces_lowerz = vpa.scratch7
+    fill_integral_pieces!(
+        pdf_slice, vthe, vpa, vpa_unnorm, density_integral_pieces_lowerz,
+        flow_integral_pieces_lowerz, energy_integral_pieces_lowerz,
+        cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz)
+
+    # Update the part2 integrals since we've applied the A and C factors
+    _, _, _, _, a2, b2, c2, d2 =
+        get_integrals_and_derivatives_lowerz(
+            vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt,
+            density_integral_pieces_lowerz, flow_integral_pieces_lowerz,
+            energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz,
+            quartic_integral_pieces_lowerz, bc_constraints)
+
+    function get_part3_for_one_moment_lower(integral_pieces)
+        # Integral contribution from the cell containing sigma
+        integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind])
+
+        part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1])
+        part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
+
+        return part3
+    end
+    a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz)
+    b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz)
+    c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz)
+    d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz)
+
+    # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
+    correction0_integral_pieces = @. vpa.scratch3 = pdf_slice * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe^2)
+    for ivpa ∈ 1:sigma_ind
+        # We only add the corrections to 'part3', so zero them out for negative v_∥.
+        # I think this is only actually significant for `sigma_ind-1` and
+        # `sigma_ind`. Even though `sigma_ind` is part of the distribution
+        # function that we are correcting, for v_∥>0, it affects the integral in
+        # the 'sigma_cell' between `sigma_ind-1` and `sigma_ind`, which would
+        # affect the numerically calculated integrals for f(v_∥<0), so if we
+        # 'corrected' its value, those integrals would change and the constraints
+        # would not be exactly satisfied. The difference should be small, as the
+        # correction at that point is multiplied by
+        # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
+        correction0_integral_pieces[ivpa] = 0.0
+    end
+    correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe
+    correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe
+    correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe
+    correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe
+    correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe
+    correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe
+
+    alpha = get_part3_for_one_moment_lower(correction0_integral_pieces)
+    beta = get_part3_for_one_moment_lower(correction1_integral_pieces)
+    gamma = get_part3_for_one_moment_lower(correction2_integral_pieces)
+    delta = get_part3_for_one_moment_lower(correction3_integral_pieces)
+    epsilon = get_part3_for_one_moment_lower(correction4_integral_pieces)
+    zeta = get_part3_for_one_moment_lower(correction5_integral_pieces)
+    eta = get_part3_for_one_moment_lower(correction6_integral_pieces)
+
+    return a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta
+end
+
+function get_integrals_and_derivatives_upperz(
+             vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt,
+             density_integral_pieces_upperz, flow_integral_pieces_upperz,
+             energy_integral_pieces_upperz, cubic_integral_pieces_upperz,
+             quartic_integral_pieces_upperz, bc_constraints)
+    # vcut_fraction is the fraction of the distance between plus_vcut_ind and
+    # plus_vcut_ind+1 where vcut is.
+    vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
+
+    function get_for_one_moment(integral_pieces)
+        # Integral contribution from the cell containing vcut
+        # Define these as follows to be consistent with the way the cutoff is
+        # applied around plus_vcut_ind below.
+        # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2`
+        # include all the contributions from the grid points `plus_vcut_ind`
+        # and `plus_vcut_ind+1`, not just those from 'inside' the grid cell.
+        if vcut_fraction > 0.5
+            integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] +
+                                       integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5)
+            integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction)
+
+            # part1prime is d(part1)/d(vcut)
+            part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
+        else
+            integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction)
+            integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) +
+                                       integral_pieces[plus_vcut_ind+1]
+
+            # part1prime is d(part1)/d(vcut)
+            part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
+        end
+
+        part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1
+
+        # Integral contribution from the cell containing sigma
+        integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1])
+
+        part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1])
+        part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell
+        # part2prime is d(part2)/d(vcut)
+        part2prime = -part1prime
+
+        return part1, part1prime, part2, part2prime
+    end
+    this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz)
+    this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz)
+    this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz)
+    this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz)
+    this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz)
+
+    return get_residual_and_coefficients_for_bc(
+               this_a1, this_a1prime, this_a2, this_a2prime, this_b1,
+               this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime,
+               this_d1, this_d1prime, this_e1, this_e1prime, this_e2,
+               this_e2prime, u_over_vt, bc_constraints)...,
+           this_a2, this_b2, this_c2, this_d2
+end
+
+function get_upperz_integral_correction_components(
+             pdf_slice, vthe, vpa, vpa_unnorm, u_over_vt, sigma_ind, sigma_fraction, vcut,
+             minus_vcut_ind, plus_vcut_ind, bc_constraints)
+
+    # Need to recalculate integral pieces with the updated distribution function
+    density_integral_pieces_upperz = vpa.scratch3
+    flow_integral_pieces_upperz = vpa.scratch4
+    energy_integral_pieces_upperz = vpa.scratch5
+    cubic_integral_pieces_upperz = vpa.scratch6
+    quartic_integral_pieces_upperz = vpa.scratch7
+    fill_integral_pieces!(
+        pdf_slice, vthe, vpa, vpa_unnorm,
+        density_integral_pieces_upperz, flow_integral_pieces_upperz,
+        energy_integral_pieces_upperz, cubic_integral_pieces_upperz,
+        quartic_integral_pieces_upperz)
+
+    # Update the part2 integrals since we've applied the A and C factors
+    _, _, _, _, a2, b2, c2, d2 =
+        get_integrals_and_derivatives_upperz(
+            vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt,
+            density_integral_pieces_upperz, flow_integral_pieces_upperz,
+            energy_integral_pieces_upperz, cubic_integral_pieces_upperz,
+            quartic_integral_pieces_upperz, bc_constraints)
+
+    function get_part3_for_one_moment_upper(integral_pieces)
+        # Integral contribution from the cell containing sigma
+        integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1])
+
+        part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1])
+        part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
+
+        return part3
+    end
+    a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz)
+    b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz)
+    c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz)
+    d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz)
+
+    # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
+    correction0_integral_pieces = @. vpa.scratch3 = pdf_slice * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe^2)
+    for ivpa ∈ sigma_ind:vpa.n
+        # We only add the corrections to 'part3', so zero them out for positive v_∥.
+        # I think this is only actually significant for `sigma_ind` and
+        # `sigma_ind+1`. Even though `sigma_ind` is part of the distribution
+        # function that we are correcting, for v_∥<0, it affects the integral in
+        # the 'sigma_cell' between `sigma_ind` and `sigma_ind+1`, which would
+        # affect the numerically calculated integrals for f(v_∥>0), so if we
+        # 'corrected' its value, those integrals would change and the constraints
+        # would not be exactly satisfied. The difference should be small, as the
+        # correction at that point is multiplied by
+        # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
+        correction0_integral_pieces[ivpa] = 0.0
+    end
+    correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe
+    correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe
+    correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe
+    correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe
+    correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe
+    correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe
+
+    alpha = get_part3_for_one_moment_upper(correction0_integral_pieces)
+    beta = get_part3_for_one_moment_upper(correction1_integral_pieces)
+    gamma = get_part3_for_one_moment_upper(correction2_integral_pieces)
+    delta = get_part3_for_one_moment_upper(correction3_integral_pieces)
+    epsilon = get_part3_for_one_moment_upper(correction4_integral_pieces)
+    zeta = get_part3_for_one_moment_upper(correction5_integral_pieces)
+    eta = get_part3_for_one_moment_upper(correction6_integral_pieces)
+
+    return a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta
+end
+
 @timeit global_timer enforce_boundary_condition_on_electron_pdf!(
                          pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral,
                          vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi, ir;
-                         bc_constraints=true, update_vcut=true) = begin
+                         bc_constraints=true, update_vcut=true, lowerz_vcut_inds=nothing,
+                         upperz_vcut_inds=nothing) = begin
 
     @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value")
 
@@ -2373,39 +2661,6 @@ end
 
     newton_max_its = 100
 
-    function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime,
-                                                  c1, c1prime, c2, c2prime, d1, d1prime,
-                                                  e1, e1prime, e2, e2prime, u_over_vt)
-        if bc_constraints
-            alpha = a1 + 2.0 * a2
-            alphaprime = a1prime + 2.0 * a2prime
-            beta = c1 + 2.0 * c2
-            betaprime = c1prime + 2.0 * c2prime
-            gamma = u_over_vt^2 * alpha - 2.0 * u_over_vt * b1 + beta
-            gammaprime = u_over_vt^2 * alphaprime - 2.0 * u_over_vt * b1prime + betaprime
-            delta = u_over_vt^2 * beta - 2.0 * u_over_vt * d1 + e1 + 2.0 * e2
-            deltaprime = u_over_vt^2 * betaprime - 2.0 * u_over_vt * d1prime + e1prime + 2.0 * e2prime
-
-            A = (0.5 * beta - delta) / (beta * gamma - alpha * delta)
-            Aprime = (0.5 * betaprime - deltaprime
-                      - (0.5 * beta - delta) * (gamma * betaprime + beta * gammaprime - delta * alphaprime - alpha * deltaprime)
-                      / (beta * gamma - alpha * delta)
-                     ) / (beta * gamma - alpha * delta)
-            C = (1.0 - alpha * A) / beta
-            Cprime = -(A * alphaprime + alpha * Aprime) / beta - (1.0 - alpha * A) * betaprime / beta^2
-        else
-            A = 1.0
-            Aprime = 0.0
-            C = 0.0
-            Cprime = 0.0
-        end
-
-        epsilon = A * b1 + C * d1 - u_over_vt
-        epsilonprime = b1 * Aprime + A * b1prime + d1 * Cprime + C * d1prime
-
-        return epsilon, epsilonprime, A, C
-    end
-
     @serial_region begin
         if z.irank == 0
             if z.bc != "wall"
@@ -2418,8 +2673,9 @@ end
 
             vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction,
                 element_with_zero, element_with_zero_boundary, last_point_near_zero,
-                reversed_wpa_of_minus_vpa = get_cutoff_params_lower(upar, vthe, phi,
-                                                                    me_over_mi, vpa, ir)
+                reversed_wpa_of_minus_vpa =
+                    get_cutoff_params_lower(upar[1,ir], vthe[1,ir], phi[1,ir], me_over_mi,
+                                            vpa)
 
             # interpolate the pdf onto this grid
             # 'near zero' means in the range where
@@ -2441,75 +2697,31 @@ end
             pdf[last_point_near_zero+1:end,1,1] .= reversed_pdf_far_from_zero
 
             # Per-grid-point contributions to moment integrals
-            # Note that we need to include the normalisation factor of 1/sqrt(pi) that
+            # Note that we need to include the normalisation factor of 1/sqrt(π) that
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
-            density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-            energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-            cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-            quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-
-            function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
-                # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
-                # minus_vcut_ind where -vcut is.
-                local vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
-
-                function get_for_one_moment(integral_pieces)
-                    # Integral contributions from the cell containing vcut.
-                    # Define these as follows to be consistent with the way the cutoff is
-                    # applied around plus_vcut_ind below.
-                    # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2`
-                    # include all the contributions from the grid points
-                    # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from
-                    # 'inside' the grid cell.
-                    if vcut_fraction < 0.5
-                        integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) +
-                                                   integral_pieces[minus_vcut_ind]
-                        integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction)
-
-                        # part1prime is d(part1)/d(vcut)
-                        part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
-                    else
-                        integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction)
-                        integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] +
-                                                   integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5)
-
-                        # part1prime is d(part1)/d(vcut)
-                        part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
-                    end
-
-                    part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1
-
-                    # Integral contribution from the cell containing sigma
-                    integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind])
-
-                    part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2])
-                    part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell
-                    # part2prime is d(part2)/d(vcut)
-                    part2prime = -part1prime
-
-                    return part1, part1prime, part2, part2prime
-                end
-                this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz)
-                this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz)
-                this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz)
-                this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz)
-                this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz)
-
-                return get_residual_and_coefficients_for_bc(
-                           this_a1, this_a1prime, this_a2, this_a2prime, this_b1,
-                           this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime,
-                           this_d1, this_d1prime, this_e1, this_e1prime, this_e2,
-                           this_e2prime, u_over_vt)...,
-                       this_a2, this_b2, this_c2, this_d2
-            end
+            density_integral_pieces_lowerz = vpa.scratch3
+            flow_integral_pieces_lowerz = vpa.scratch4
+            energy_integral_pieces_lowerz = vpa.scratch5
+            cubic_integral_pieces_lowerz = vpa.scratch6
+            quartic_integral_pieces_lowerz = vpa.scratch7
+            fill_integral_pieces!(
+                @view(pdf[:,1,1]), vthe[1], vpa, vpa_unnorm,
+                density_integral_pieces_lowerz, flow_integral_pieces_lowerz,
+                energy_integral_pieces_lowerz, cubic_integral_pieces_lowerz,
+                quartic_integral_pieces_lowerz)
 
             counter = 1
             A = 1.0
             C = 0.0
             # Always do at least one update of vcut
-            epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
+            epsilon, epsilonprime, A, C, a2, b2, c2, d2 =
+                get_integrals_and_derivatives_lowerz(
+                    vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm,
+                    u_over_vt, density_integral_pieces_lowerz,
+                    flow_integral_pieces_lowerz, energy_integral_pieces_lowerz,
+                    cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz,
+                    bc_constraints)
             if bc_constraints
                 while true
                     # Newton iteration update. Note that primes denote derivatives with
@@ -2531,7 +2743,13 @@ end
                     vcut = vcut + delta_v
                     minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
 
-                    epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
+                    epsilon, epsilonprime, A, C, a2, b2, c2, d2 =
+                        get_integrals_and_derivatives_lowerz(
+                            vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm,
+                            u_over_vt, density_integral_pieces_lowerz,
+                            flow_integral_pieces_lowerz, energy_integral_pieces_lowerz,
+                            cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz,
+                            bc_constraints)
 
                     if abs(epsilon) < newton_tol
                         break
@@ -2558,7 +2776,13 @@ end
                         break
                     end
 
-                    epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
+                    epsilon, epsilonprime, _, _, _, _, _, _ =
+                        get_integrals_and_derivatives_lowerz(
+                            vcut, minus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm,
+                            u_over_vt, density_integral_pieces_lowerz,
+                            flow_integral_pieces_lowerz, energy_integral_pieces_lowerz,
+                            cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz,
+                            bc_constraints)
                 end
             end
 
@@ -2572,8 +2796,14 @@ end
             # plus_vcut_ind+1 where vcut is.
             vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
             if vcut_fraction > 0.5
+                if lowerz_vcut_inds !== nothing
+                    lowerz_vcut_inds[ir] = plus_vcut_ind+1
+                end
                 pdf[plus_vcut_ind+1,1,1] *= vcut_fraction - 0.5
             else
+                if lowerz_vcut_inds !== nothing
+                    lowerz_vcut_inds[ir] = plus_vcut_ind
+                end
                 pdf[plus_vcut_ind+1,1,1] = 0.0
                 pdf[plus_vcut_ind,1,1] *= vcut_fraction + 0.5
             end
@@ -2591,60 +2821,11 @@ end
             # boundary condition, but would not be numerically true because of the
             # interpolation.
 
-            # Need to recalculate these with the updated distribution function
-            @views @. density_integral_pieces_lowerz = pdf[:,1,1] * vpa.wgts / sqrt(pi)
-            @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-            @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-            @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-            @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1]
-
-            # Update the part2 integrals since we've applied the A and C factors
-            _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
-
-            function get_part3_for_one_moment_lower(integral_pieces)
-                # Integral contribution from the cell containing sigma
-                integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind])
-
-                part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1])
-                part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
-
-                return part3
-            end
-            a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz)
-            b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz)
-            c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz)
-            d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz)
-
-            # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
-            sharpness = 4.0
-            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1]^2)
-            for ivpa ∈ 1:sigma_ind
-                # We only add the corrections to 'part3', so zero them out for negative v_∥.
-                # I think this is only actually significant for `sigma_ind-1` and
-                # `sigma_ind`. Even though `sigma_ind` is part of the distribution
-                # function that we are correcting, for v_∥>0, it affects the integral in
-                # the 'sigma_cell' between `sigma_ind-1` and `sigma_ind`, which would
-                # affect the numerically calculated integrals for f(v_∥<0), so if we
-                # 'corrected' its value, those integrals would change and the constraints
-                # would not be exactly satisfied. The difference should be small, as the
-                # correction at that point is multiplied by
-                # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
-                correction0_integral_pieces[ivpa] = 0.0
-            end
-            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1]
-            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1]
-            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1]
-            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1]
-            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1]
-            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1]
-
-            alpha = get_part3_for_one_moment_lower(correction0_integral_pieces)
-            beta = get_part3_for_one_moment_lower(correction1_integral_pieces)
-            gamma = get_part3_for_one_moment_lower(correction2_integral_pieces)
-            delta = get_part3_for_one_moment_lower(correction3_integral_pieces)
-            epsilon = get_part3_for_one_moment_lower(correction4_integral_pieces)
-            zeta = get_part3_for_one_moment_lower(correction5_integral_pieces)
-            eta = get_part3_for_one_moment_lower(correction6_integral_pieces)
+            a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta =
+                get_lowerz_integral_correction_components(
+                    @view(pdf[:,1,1]), vthe[1], vpa, vpa_unnorm, u_over_vt,
+                    sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind,
+                    bc_constraints)
 
             # Update the v_∥>0 part of f to correct the moments as
             # f(0<v_∥<vcut) = (1 + (A + B*v/vth + C*v^2/vth^2 + D*v^3/vth^3) * v^2/vth^2 / (1 + v^2/vth^2)) * fhat(0<v_∥<vcut)
@@ -2667,7 +2848,7 @@ end
                                     + B * v_over_vth
                                     + C * v_over_vth^2
                                     + D * v_over_vth^3) *
-                                   sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) *
+                                   integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) *
                                    pdf[ivpa,1,1]
             end
         end
@@ -2691,8 +2872,9 @@ end
 
             vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction,
                 element_with_zero, element_with_zero_boundary, first_point_near_zero,
-                reversed_wpa_of_minus_vpa = get_cutoff_params_upper(upar, vthe, phi,
-                                                                    me_over_mi, vpa, ir)
+                reversed_wpa_of_minus_vpa =
+                    get_cutoff_params_upper(upar[end,ir], vthe[end,ir], phi[end,ir],
+                                            me_over_mi, vpa)
 
             # interpolate the pdf onto this grid
             # 'near zero' means in the range where
@@ -2714,72 +2896,28 @@ end
             pdf[1:first_point_near_zero-1,1,end] .= reversed_pdf
 
             # Per-grid-point contributions to moment integrals
-            # Note that we need to include the normalisation factor of 1/sqrt(pi) that
+            # Note that we need to include the normalisation factor of 1/sqrt(π) that
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
-            density_integral_pieces_upperz = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces_upperz = @. vpa.scratch4 = density_integral_pieces_upperz * vpa_unnorm / vthe[end]
-            energy_integral_pieces_upperz = @. vpa.scratch5 = flow_integral_pieces_upperz * vpa_unnorm / vthe[end]
-            cubic_integral_pieces_upperz = @. vpa.scratch6 = energy_integral_pieces_upperz * vpa_unnorm / vthe[end]
-            quartic_integral_pieces_upperz = @. vpa.scratch7 = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end]
-
-            function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
-                # vcut_fraction is the fraction of the distance between plus_vcut_ind and
-                # plus_vcut_ind+1 where vcut is.
-                local vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
-
-                function get_for_one_moment(integral_pieces)
-                    # Integral contribution from the cell containing vcut
-                    # Define these as follows to be consistent with the way the cutoff is
-                    # applied around plus_vcut_ind below.
-                    # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2`
-                    # include all the contributions from the grid points `plus_vcut_ind`
-                    # and `plus_vcut_ind+1`, not just those from 'inside' the grid cell.
-                    if vcut_fraction > 0.5
-                        integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] +
-                                                   integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5)
-                        integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction)
-
-                        # part1prime is d(part1)/d(vcut)
-                        part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
-                    else
-                        integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction)
-                        integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) +
-                                                   integral_pieces[plus_vcut_ind+1]
-
-                        # part1prime is d(part1)/d(vcut)
-                        part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
-                    end
-
-                    part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1
-
-                    # Integral contribution from the cell containing sigma
-                    integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1])
-
-                    part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1])
-                    part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell
-                    # part2prime is d(part2)/d(vcut)
-                    part2prime = -part1prime
-
-                    return part1, part1prime, part2, part2prime
-                end
-                this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz)
-                this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz)
-                this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz)
-                this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz)
-                this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz)
-
-                return get_residual_and_coefficients_for_bc(
-                           this_a1, this_a1prime, this_a2, this_a2prime, this_b1,
-                           this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime,
-                           this_d1, this_d1prime, this_e1, this_e1prime, this_e2,
-                           this_e2prime, u_over_vt)...,
-                       this_a2, this_b2, this_c2, this_d2
-            end
+            density_integral_pieces_upperz = vpa.scratch3
+            flow_integral_pieces_upperz = vpa.scratch4
+            energy_integral_pieces_upperz = vpa.scratch5
+            cubic_integral_pieces_upperz = vpa.scratch6
+            quartic_integral_pieces_upperz = vpa.scratch7
+            fill_integral_pieces!(
+                @view(pdf[:,1,end]), vthe[end], vpa, vpa_unnorm,
+                density_integral_pieces_upperz, flow_integral_pieces_upperz,
+                energy_integral_pieces_upperz, cubic_integral_pieces_upperz,
+                quartic_integral_pieces_upperz)
 
             counter = 1
             # Always do at least one update of vcut
-            epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
+            epsilon, epsilonprime, A, C, a2, b2, c2, d2 =
+                get_integrals_and_derivatives_upperz(
+                    vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm, u_over_vt,
+                    density_integral_pieces_upperz, flow_integral_pieces_upperz,
+                    energy_integral_pieces_upperz, cubic_integral_pieces_upperz,
+                    quartic_integral_pieces_upperz, bc_constraints)
             if bc_constraints
                 while true
                     # Newton iteration update. Note that primes denote derivatives with
@@ -2801,7 +2939,13 @@ end
                     vcut = vcut + delta_v
                     plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
 
-                    epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
+                    epsilon, epsilonprime, A, C, a2, b2, c2, d2 =
+                        get_integrals_and_derivatives_upperz(
+                            vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm,
+                            u_over_vt, density_integral_pieces_upperz,
+                            flow_integral_pieces_upperz, energy_integral_pieces_upperz,
+                            cubic_integral_pieces_upperz, quartic_integral_pieces_upperz,
+                            bc_constraints)
 
                     if abs(epsilon) < newton_tol
                         break
@@ -2828,7 +2972,13 @@ end
                         break
                     end
 
-                    epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
+                    epsilon, epsilonprime, _, _, _, _, _, _ =
+                        get_integrals_and_derivatives_upperz(
+                            vcut, plus_vcut_ind, sigma_ind, sigma_fraction, vpa_unnorm,
+                            u_over_vt, density_integral_pieces_upperz,
+                            flow_integral_pieces_upperz, energy_integral_pieces_upperz,
+                            cubic_integral_pieces_upperz, quartic_integral_pieces_upperz,
+                            bc_constraints)
                 end
             end
 
@@ -2842,8 +2992,14 @@ end
             # minus_vcut_ind where -vcut is.
             vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
             if vcut_fraction < 0.5
+                if upperz_vcut_inds !== nothing
+                    upperz_vcut_inds[ir] = minus_vcut_ind-1
+                end
                 pdf[minus_vcut_ind-1,1,end] *= 0.5 - vcut_fraction
             else
+                if upperz_vcut_inds !== nothing
+                    upperz_vcut_inds[ir] = minus_vcut_ind
+                end
                 pdf[minus_vcut_ind-1,1,end] = 0.0
                 pdf[minus_vcut_ind,1,end] *= 1.5 - vcut_fraction
             end
@@ -2861,60 +3017,11 @@ end
             # boundary condition, but would not be numerically true because of the
             # interpolation.
 
-            # Need to recalculate these with the updated distribution function
-            @views @. density_integral_pieces_upperz = pdf[:,1,end] * vpa.wgts / sqrt(pi)
-            @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end]
-            @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end]
-            @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end]
-            @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end]
-
-            # Update the part2 integrals since we've applied the A and C factors
-            _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
-
-            function get_part3_for_one_moment_upper(integral_pieces)
-                # Integral contribution from the cell containing sigma
-                integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1])
-
-                part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1])
-                part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
-
-                return part3
-            end
-            a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz)
-            b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz)
-            c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz)
-            d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz)
-
-            # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
-            sharpness = 4.0
-            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end]^2)
-            for ivpa ∈ sigma_ind:vpa.n
-                # We only add the corrections to 'part3', so zero them out for positive v_∥.
-                # I think this is only actually significant for `sigma_ind` and
-                # `sigma_ind+1`. Even though `sigma_ind` is part of the distribution
-                # function that we are correcting, for v_∥<0, it affects the integral in
-                # the 'sigma_cell' between `sigma_ind` and `sigma_ind+1`, which would
-                # affect the numerically calculated integrals for f(v_∥>0), so if we
-                # 'corrected' its value, those integrals would change and the constraints
-                # would not be exactly satisfied. The difference should be small, as the
-                # correction at that point is multiplied by
-                # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
-                correction0_integral_pieces[ivpa] = 0.0
-            end
-            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end]
-            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end]
-            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end]
-            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end]
-            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end]
-            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end]
-
-            alpha = get_part3_for_one_moment_upper(correction0_integral_pieces)
-            beta = get_part3_for_one_moment_upper(correction1_integral_pieces)
-            gamma = get_part3_for_one_moment_upper(correction2_integral_pieces)
-            delta = get_part3_for_one_moment_upper(correction3_integral_pieces)
-            epsilon = get_part3_for_one_moment_upper(correction4_integral_pieces)
-            zeta = get_part3_for_one_moment_upper(correction5_integral_pieces)
-            eta = get_part3_for_one_moment_upper(correction6_integral_pieces)
+            a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta =
+                get_upperz_integral_correction_components(
+                    @view(pdf[:,1,end]), vthe[end], vpa, vpa_unnorm, u_over_vt,
+                    sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind,
+                    bc_constraints)
 
             # Update the v_∥>0 part of f to correct the moments as
             # f(0<v_∥<vcut) = (1 + (A + B*v/vth + C*v^2/vth^2 + D*v^3/vth^3) * v^2/vth^2 / (1 + v^2/vth^2)) * fhat(0<v_∥<vcut)
@@ -2933,12 +3040,12 @@ end
             for ivpa ∈ minus_vcut_ind-1:sigma_ind-1
                 v_over_vth = vpa_unnorm[ivpa]/vthe[end]
                 pdf[ivpa,1,end] = pdf[ivpa,1,end] +
-                                   (A
-                                    + B * v_over_vth
-                                    + C * v_over_vth^2
-                                    + D * v_over_vth^3) *
-                                   sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) *
-                                   pdf[ivpa,1,end]
+                                  (A
+                                   + B * v_over_vth
+                                   + C * v_over_vth^2
+                                   + D * v_over_vth^3) *
+                                  integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) *
+                                  pdf[ivpa,1,end]
             end
         end
     end
@@ -2946,6 +3053,1089 @@ end
     return nothing
 end
 
+# In several places it is useful to zero out (in residuals, etc.) the points that would be
+# set by the z boundary condition.
+function zero_z_boundary_condition_points(residual, z, vpa, moments, ir)
+    if z.bc ∈ ("wall", "constant",) && (z.irank == 0 || z.irank == z.nrank - 1)
+        # Boundary conditions on incoming part of distribution function. Note
+        # that as density, upar, ppar do not change in this implicit step,
+        # f_electron_newvar, f_old, and residual should all be zero at exactly
+        # the same set of grid points, so it is reasonable to zero-out
+        # `residual` to impose the boundary condition. We impose this after
+        # subtracting f_old in case rounding errors, etc. mean that at some
+        # point f_old had a different boundary condition cut-off index.
+        begin_vperp_vpa_region()
+        v_unnorm = vpa.scratch
+        zero = 1.0e-14
+        if z.irank == 0
+            v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[1,ir],
+                                        moments.electron.upar[1,ir], true, true)
+            @loop_vperp_vpa ivperp ivpa begin
+                if v_unnorm[ivpa] > -zero
+                    residual[ivpa,ivperp,1] = 0.0
+                end
+            end
+        end
+        if z.irank == z.nrank - 1
+            v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir],
+                                        moments.electron.upar[end,ir], true, true)
+            @loop_vperp_vpa ivperp ivpa begin
+                if v_unnorm[ivpa] < zero
+                    residual[ivpa,ivperp,end] = 0.0
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+"""
+    add_wall_boundary_condition_to_Jacobian!(jacobian, phi, pdf, ppar, vthe, upar, z,
+                                             vperp, vpa, vperp_spectral, vpa_spectral,
+                                             vpa_adv, moments, vpa_diffusion, me_over_mi,
+                                             ir)
+
+All the contributions that we add in this function have to be added with a -'ve sign so
+that they combine with the 1 on the diagonal of the preconditioner matrix to make rows
+corresponding to the boundary points which define constraint equations imposing the
+boundary condition on those entries of δg (when the right-hand-side is set to zero).
+"""
+@timeit global_timer add_wall_boundary_condition_to_Jacobian!(
+                         jacobian, phi, pdf, ppar, vthe, upar, z, vperp, vpa,
+                         vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion,
+                         me_over_mi, ir, include, iz=nothing;
+                         pdf_offset=0, ppar_offset=0) = begin
+    if z.bc != "wall"
+        return nothing
+    end
+
+    if include ∈ (:all, :explicit_v)
+        include_lower = (z.irank == 0)
+        include_upper = (z.irank == z.nrank - 1)
+        zend = z.n
+        phi_lower = phi[1]
+        phi_upper = phi[end]
+        pdf_lower = @view pdf[:,:,1]
+        pdf_upper = @view pdf[:,:,end]
+        ppar_lower = ppar[1]
+        ppar_upper = ppar[end]
+        vthe_lower = vthe[1]
+        vthe_upper = vthe[end]
+        upar_lower = upar[1]
+        upar_upper = upar[end]
+        shared_mem_parallel = true
+    elseif include === :implicit_v
+        include_lower = (z.irank == 0) && iz == 1
+        include_upper = (z.irank == z.nrank - 1) && iz == z.n
+        zend = 1
+        phi_lower = phi_upper = phi
+        pdf_lower = pdf_upper = pdf
+        ppar_lower = ppar_upper = ppar
+        vthe_lower = vthe_upper = vthe
+        upar_lower = upar_upper = upar
+
+        # When using :implicit_v, this function is called inside a loop that is already
+        # parallelised over z, so we cannot change the parallel region type.
+        shared_mem_parallel = false
+    else
+        return nothing
+    end
+
+    if include_lower
+        shared_mem_parallel && begin_vperp_region()
+        @loop_vperp ivperp begin
+            # Skip velocity space boundary points.
+            if vperp.n > 1 && ivperp == vperp.n
+                continue
+            end
+
+            # Get matrix entries for the response of the sheath-edge boundary condition.
+            # Ignore constraints, as these are non-linear and also should be small
+            # corrections which should not matter much for a preconditioner.
+
+            jac_range = pdf_offset+(ivperp-1)*vpa.n+1 : pdf_offset+ivperp*vpa.n
+            jacobian_zbegin = @view jacobian[jac_range,jac_range]
+            jacobian_zbegin_ppar = @view jacobian[jac_range,ppar_offset+1]
+
+            vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+                element_with_zero, element_with_zero_boundary, last_point_near_zero,
+                reversed_wpa_of_minus_vpa =
+                    get_cutoff_params_lower(upar_lower, vthe_lower, phi_lower, me_over_mi,
+                                            vpa)
+
+            plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
+            # plus_vcut_fraction is the fraction of the distance between plus_vcut_ind and
+            # plus_vcut_ind+1 where vcut is.
+            plus_vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
+            if plus_vcut_fraction > 0.5
+                last_nonzero_ind = plus_vcut_ind + 1
+            else
+                last_nonzero_ind = plus_vcut_ind
+            end
+
+            # Interpolate to the 'near zero' points
+            @views fill_interpolate_symmetric_matrix!(
+                       jacobian_zbegin[sigma_ind:last_point_near_zero,element_with_zero_boundary:sigma_ind-1],
+                       vpa_unnorm[sigma_ind:last_point_near_zero],
+                       vpa_unnorm[element_with_zero_boundary:sigma_ind-1])
+
+            # Interpolate to the 'far from zero' points
+            @views fill_1d_interpolation_matrix!(
+                       jacobian_zbegin[last_nonzero_ind:-1:last_point_near_zero+1,:],
+                       reversed_wpa_of_minus_vpa[vpa.n-last_nonzero_ind+1:vpa.n-last_point_near_zero],
+                       vpa, vpa_spectral)
+
+            # Reverse the sign of the elements we just filled
+            jacobian_zbegin[sigma_ind:last_nonzero_ind,1:sigma_ind-1] .*= -1.0
+
+            if plus_vcut_fraction > 0.5
+                jacobian_zbegin[last_nonzero_ind,1:sigma_ind-1] .*= plus_vcut_fraction - 0.5
+            else
+                jacobian_zbegin[last_nonzero_ind,1:sigma_ind-1] .*= plus_vcut_fraction + 0.5
+            end
+
+            # Fill in elements giving response to changes in electron_ppar
+            # A change to ppar results in a shift in the location of w_∥(v_∥=0).
+            # The interpolated values of g_e(w_∥) that are filled by the boundary
+            # condition are (dropping _∥ subscripts for the remaining comments in this
+            # if-clause)
+            #   g(w|v>0) = g(2*u/vth - w)
+            # So
+            #   δg(w|v>0) = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δvth
+            #             = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δ(sqrt(2*ppar/n)
+            #             = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δppar * vth / (2*ppar)
+            #             = -u/vth/ppar * dg/dw(2*u/vth - w) * δppar
+            #
+            # As jacobian_zbegin_ppar only depends on variations of a single value
+            # `ppar[1]`, it might be more maintainable and computationally cheaper to
+            # calculate jacobian_zbegin_ppar with a finite difference method (applying the
+            # boundary condition function with a perturbed pressure `ppar[1] + ϵ` for some
+            # small ϵ) rather than calculating it using the method below. If we used the
+            # finite difference approach, we would have to be careful that the ϵ step does
+            # not change the cutoff index (otherwise we would pick up non-smooth changes)
+            # - one possibility might to be to switch to `-ϵ` instead of `+ϵ` if this
+            # happens.
+
+            dpdfdv_near_zero = @view vpa.scratch[sigma_ind:last_point_near_zero]
+            @views interpolate_symmetric!(dpdfdv_near_zero,
+                                          vpa_unnorm[sigma_ind:last_point_near_zero],
+                                          pdf_lower[element_with_zero_boundary:sigma_ind-1,ivperp],
+                                          vpa_unnorm[element_with_zero_boundary:sigma_ind-1],
+                                          Val(1))
+            # The above call to interpolate_symmetric calculates dg/dv rather than dg/dw,
+            # so need to multiply by an extra factor of vthe.
+            #   δg(w|v>0) = -u/ppar * dg/dv(2*u/vth - w) * δppar
+            @. jacobian_zbegin_ppar[sigma_ind:last_point_near_zero] -=
+                   -upar_lower / ppar_lower * dpdfdv_near_zero
+
+            dpdfdw_far_from_zero = @view vpa.scratch[last_point_near_zero+1:last_nonzero_ind]
+            @views interpolate_to_grid_1d!(dpdfdw_far_from_zero,
+                                           reversed_wpa_of_minus_vpa[vpa.n-last_nonzero_ind+1:vpa.n-last_point_near_zero],
+                                           pdf_lower[:,ivperp], vpa, vpa_spectral, Val(1))
+            reverse!(dpdfdw_far_from_zero)
+            # Note that because we calculated the derivative of the interpolating
+            # function, and then reversed the results, we need to multiply the derivative
+            # by -1.
+            @. jacobian_zbegin_ppar[last_point_near_zero+1:last_nonzero_ind] -=
+                   upar_lower / vthe_lower / ppar_lower * dpdfdw_far_from_zero
+
+            # Whatever the variation due to interpolation is at the last nonzero grid
+            # point, it will be reduced by the cutoff.
+            if plus_vcut_fraction > 0.5
+                jacobian_zbegin_ppar[last_nonzero_ind] *= plus_vcut_fraction - 0.5
+            else
+                jacobian_zbegin_ppar[last_nonzero_ind] *= plus_vcut_fraction + 0.5
+            end
+
+            # The change in electron_ppar also changes the position of
+            #     wcut = (vcut - upar)/vthe
+            # as vcut does not change (within the Krylov iteration where this
+            # preconditioner matrix is used), but vthe does because of the change in
+            # electron_ppar. We actually use plus_vcut_fraction calculated from
+            # vpa_unnorm, so it is most convenient to consider:
+            #     v = vthe * w + upar
+            #     δv = δvthe * w
+            #        = δvthe * (v - upar)/vthe
+            #        = δppar * vthe / (2*ppar) * (v - upar)/vthe
+            #        = δppar * (v - upar) / 2 / ppar
+            # with vl and vu the values of v at the grid points below and above vcut
+            #     plus_vcut_fraction = (vcut - vl) / (vu - vl)
+            #     δplus_vcut_fraction = -(vcut - vl) / (vu - vl)^2 * (δvu - δvl) - δvl / (vu - vl)
+            #     δplus_vcut_fraction = [-(vcut - vl) / (vu - vl)^2 * (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar
+            #     δplus_vcut_fraction = [-(vcut - vl) / (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar
+            #     δplus_vcut_fraction = -(vcut - upar) / (vu - vl) / 2 / ppar * δppar
+            interpolated_pdf_at_last_nonzero_ind = @view vpa.scratch[1:1]
+            reversed_last_nonzero_ind = vpa.n-last_nonzero_ind+1
+            @views interpolate_to_grid_1d!(interpolated_pdf_at_last_nonzero_ind,
+                                           reversed_wpa_of_minus_vpa[reversed_last_nonzero_ind:reversed_last_nonzero_ind],
+                                           pdf_lower[:,ivperp], vpa, vpa_spectral)
+
+            dplus_vcut_fraction_dp = -(vcut - upar_lower) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) / 2.0 / ppar_lower
+            jacobian_zbegin_ppar[last_nonzero_ind] -= interpolated_pdf_at_last_nonzero_ind[] * dplus_vcut_fraction_dp
+
+            # Calculate some numerical integrals of dpdfdw that we will need later
+            function get_part3_for_one_moment_lower(integral_pieces)
+                # Integral contribution from the cell containing sigma
+                # The contribution from integral_pieces[sigma_ind-1] should be dropped,
+                # because that point is on the input grid, and this correction is due to
+                # the changes in interpolated values on the output grid due to δp_e∥. The
+                # value of g_e at sigma_ind-1 isn't interpolated so does not change in
+                # this way.
+                integral_sigma_cell = 0.5 * integral_pieces[sigma_ind]
+
+                part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1])
+                part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
+
+                return part3
+            end
+            # The contents of jacobian_zbegin_ppar at this point are the coefficients
+            # needed to get the new distribution function due to a change δp, which we can
+            # now use to calculate the response of various integrals to the same change.
+            # Note that jacobian_zbegin_ppar already contains `2.0*dsigma_dp`.
+            @. vpa.scratch = -jacobian_zbegin_ppar * vpa.wgts / sqrt(π)
+            da3_dp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            db3_dp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            dc3_dp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            dd3_dp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch = -jacobian_zbegin_ppar * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2)
+            vpa.scratch[sigma_ind-1:sigma_ind] .= 0.0
+            dalpha_dp_interp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            dbeta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            dgamma_dp_interp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            ddelta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            depsilon_dp_interp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            dzeta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_lower
+            deta_dp_interp = get_part3_for_one_moment_lower(vpa.scratch)
+
+            pdf_lowerz = vpa.scratch10
+            @views @. pdf_lowerz[1:sigma_ind-1] = pdf_lower[1:sigma_ind-1,ivperp]
+
+            # interpolate the pdf_lowerz onto this grid
+            # 'near zero' means in the range where
+            # abs(v_∥)≤abs(lower boundary of element including v_∥=0)
+            # 'far from zero' means larger values of v_∥.
+
+            # Interpolate to the 'near zero' points
+            @views interpolate_symmetric!(pdf_lowerz[sigma_ind:last_point_near_zero],
+                                          vpa_unnorm[sigma_ind:last_point_near_zero],
+                                          pdf_lowerz[element_with_zero_boundary:sigma_ind-1],
+                                          vpa_unnorm[element_with_zero_boundary:sigma_ind-1])
+
+            # Interpolate to the 'far from zero' points
+            reversed_pdf_far_from_zero = @view pdf_lowerz[last_point_near_zero+1:end]
+            interpolate_to_grid_1d!(reversed_pdf_far_from_zero,
+                                    reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero],
+                                    pdf_lowerz, vpa, vpa_spectral)
+            reverse!(reversed_pdf_far_from_zero)
+
+            minus_vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
+            if minus_vcut_fraction < 0.5
+                lower_cutoff_ind = minus_vcut_ind-1
+            else
+                lower_cutoff_ind = minus_vcut_ind
+            end
+            if plus_vcut_fraction > 0.5
+                upper_cutoff_ind = plus_vcut_ind+1
+                upper_cutoff_factor = plus_vcut_fraction - 0.5
+            else
+                upper_cutoff_ind = plus_vcut_ind
+                upper_cutoff_factor = plus_vcut_fraction + 0.5
+            end
+            pdf_lowerz[upper_cutoff_ind] *= upper_cutoff_factor
+            pdf_lowerz[upper_cutoff_ind+1:end] .= 0.0
+
+            a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta =
+                get_lowerz_integral_correction_components(
+                    pdf_lowerz, vthe_lower, vpa, vpa_unnorm, u_over_vt, sigma_ind,
+                    sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, false)
+
+            output_range = sigma_ind+1:upper_cutoff_ind
+
+            v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_lower
+
+            correction_matrix = [alpha beta    gamma   delta   ;
+                                 beta  gamma   delta   epsilon ;
+                                 gamma delta   epsilon zeta    ;
+                                 delta epsilon zeta    eta
+                                ]
+
+            # jacobian_zbegin_ppar state at this point would generate (when multiplied by
+            # some δp) a δf due to the effectively-shifted grid. The unperturbed f
+            # required corrections to make its integrals correct. Need to apply the same
+            # corrections to δf.
+            # This term seems to have a very small contribution, and could probably be
+            # skipped.
+            A, B, C, D = correction_matrix \ [a2-a3, -b2-b3, c2-c3, -d2-d3]
+            @. jacobian_zbegin_ppar[output_range] *=
+                   1.0 +
+                   (A
+                    + B * v_over_vth
+                    + C * v_over_vth^2
+                    + D * v_over_vth^3) *
+                   integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2)
+
+            # Calculate the changes in the integrals a2, b2, c2, d2, a3, b3, c3, and d3 in
+            # response to changes in electron_ppar. The changes are calculated for the
+            # combinations (a2-a3), (-b2-b3), (c2-c3), and (-d2-d3) to take advantage of
+            # some cancellations.
+
+            # Need to calculate the variation of various intermediate quantities with
+            # δp.
+            #
+            #   vth = sqrt(2*p/n)
+            #   δvth = vth / (2 * p) * δp
+            #
+            #   sigma = -u / vth
+            #   δsigma = u / vth^2 δvth
+            #          = u / (2 * vth * p) * δp
+            #
+            # We could write sigma_fraction as
+            #   sigma_fraction = (sigma - vpa[sigma_ind-1]) / (vpa[sigma_ind] - vpa[sigma_ind-1])
+            # so that
+            #   δsigma_fraction = δsigma / (vpa[sigma_ind] - vpa[sigma_ind-1])
+            #                   = u / (2 * vth * p) / (vpa[sigma_ind] - vpa[sigma_ind-1]) * δp
+            #
+            #   minus_vcut_fraction = ((-vcut - u)/vth - vpa[minus_vcut_ind-1]) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1])
+            #   δminus_vcut_fraction = (vcut + u) / vth^2 / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δvth
+            #                        = (vcut + u) / (2 * vth * p) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δp
+            #
+            #   plus_vcut_fraction = ((vcut - u)/vth - vpa[plus_vcut_ind-1]) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind])
+            #   δplus_vcut_fraction = -(vcut - u) / vth^2 / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δvth
+            #                       = -(vcut - u) / (2 * vth * p) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δp
+            density_integral_pieces_lowerz = vpa.scratch3
+            flow_integral_pieces_lowerz = vpa.scratch4
+            energy_integral_pieces_lowerz = vpa.scratch5
+            cubic_integral_pieces_lowerz = vpa.scratch6
+            quartic_integral_pieces_lowerz = vpa.scratch7
+            fill_integral_pieces!(
+                pdf_lowerz, vthe_lower, vpa, vpa_unnorm, density_integral_pieces_lowerz,
+                flow_integral_pieces_lowerz, energy_integral_pieces_lowerz,
+                cubic_integral_pieces_lowerz, quartic_integral_pieces_lowerz)
+            vpa_grid = vpa.grid
+
+            dsigma_dp = upar_lower / (2.0 * vthe_lower * ppar_lower)
+
+            dsigma_fraction_dp = dsigma_dp / (vpa_grid[sigma_ind] - vpa_grid[sigma_ind-1])
+
+            dminus_vcut_fraction_dp = (vcut + upar_lower) / (2.0 * vthe_lower * ppar_lower) / (vpa_grid[minus_vcut_ind] - vpa_grid[minus_vcut_ind-1])
+
+            dplus_vcut_fraction_dp = -(vcut - upar_lower) / (2.0 * vthe_lower * ppar_lower) / (vpa_grid[plus_vcut_ind+1] - vpa_grid[plus_vcut_ind])
+
+            density_integral_sigma_cell = 0.5 * (density_integral_pieces_lowerz[sigma_ind-1] +
+                                                 density_integral_pieces_lowerz[sigma_ind])
+            da2_minus_a3_dp = (
+                # Contribution from integral limits at sigma
+                2.0 * density_integral_sigma_cell * dsigma_fraction_dp
+                # Contribution from integral limits at -wcut-. The contribution from wcut+
+                # is included in da3_dp
+                - density_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp
+                # No contribution from explicit σ factors in w-integral for a2 or a3 as
+                # integrand does not contain σ.
+                # Change in a3 integral due to different interpolated ̂g values
+                - da3_dp
+               )
+
+            dminus_b2_minus_b3_dp = (
+                # Contribution from integral limits at sigma cancel exactly
+                # Contribution from integral limits at -wcut-. The contribution from wcut+
+                # is included in db3_dp
+                + flow_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (a2 + a3) * dsigma_dp
+                # Change in b3 integral due to different interpolated ̂g values
+                - db3_dp
+               )
+
+            energy_integral_sigma_cell = 0.5 * (energy_integral_pieces_lowerz[sigma_ind-1] +
+                                                energy_integral_pieces_lowerz[sigma_ind])
+            dc2_minus_c3_dp = (
+                # Contribution from integral limits at sigma
+                2.0 * energy_integral_sigma_cell * dsigma_fraction_dp
+                # Contribution from integral limits at -wcut-. The contribution from wcut+
+                # is included in dc3_dp
+                - energy_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + 2.0 * (-b2 + b3) * dsigma_dp
+                # Change in c3 integral due to different interpolated ̂g values
+                - dc3_dp
+               )
+
+            dminus_d2_minus_d3_dp = (
+                # Contribution from integral limits at sigma cancel exactly
+                # Contribution from integral limits at -wcut-. The contribution from wcut+
+                # is included in dd3_dp
+                + cubic_integral_pieces_lowerz[lower_cutoff_ind] * dminus_vcut_fraction_dp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + 3.0 * (c2 + c3) * dsigma_dp
+                # Change in d3 integral due to different interpolated ̂g values
+                - dd3_dp
+               )
+
+            correction0_integral_pieces = @. vpa.scratch3 = pdf_lowerz * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2)
+            for ivpa ∈ 1:sigma_ind
+                correction0_integral_pieces[ivpa] = 0.0
+            end
+
+            correctionminus1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces / vpa_unnorm * vthe_lower
+            integralminus1 = sum(@view(correctionminus1_integral_pieces[sigma_ind+1:upper_cutoff_ind]))
+
+            correction0_integral_type2_pieces = @. vpa.scratch4 = pdf_lowerz * vpa.wgts / sqrt(π) * 2.0 * integral_correction_sharpness^2 * vpa_unnorm^3 / vthe_lower^3 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_lower^2)^2
+            integral_type2 = sum(@view(correction0_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind]))
+            dalpha_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at wcut+ is included in dalpha_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * integralminus1 + integral_type2) * dsigma_dp
+                # Change in alpha integral due to different interpolated ̂g values
+                + dalpha_dp_interp
+               )
+
+            correction1_integral_pieces = @. vpa.scratch5 = correction0_integral_pieces * vpa_unnorm / vthe_lower
+            correction1_integral_type2_pieces = @. vpa.scratch6 = correction0_integral_type2_pieces * vpa_unnorm / vthe_lower
+            integral_type2 = sum(@view(correction1_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind]))
+            dbeta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at wcut+ is included in dbeta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * alpha + integral_type2) * dsigma_dp
+                # Change in beta integral due to different interpolated ̂g values
+                + dbeta_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction1_integral_pieces
+            # and correction1_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe_lower
+            correction2_integral_type2_pieces = @. vpa.scratch6 = correction1_integral_type2_pieces * vpa_unnorm / vthe_lower
+            integral_type2 = sum(@view(correction2_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind]))
+            dgamma_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at wcut+ is included in dgamma_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * beta + integral_type2) * dsigma_dp
+                # Change in gamma integral due to different interpolated ̂g values
+                + dgamma_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction2_integral_pieces
+            # and correction2_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction3_integral_pieces = @. vpa.scratch5 = correction2_integral_pieces * vpa_unnorm / vthe_lower
+            correction3_integral_type2_pieces = @. vpa.scratch6 = correction2_integral_type2_pieces * vpa_unnorm / vthe_lower
+            integral_type2 = sum(@view(correction3_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind]))
+            ddelta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at wcut+ is included in ddelta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * gamma + integral_type2) * dsigma_dp
+                # Change in delta integral due to different interpolated ̂g values
+                + ddelta_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction3_integral_pieces
+            # and correction3_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction4_integral_pieces = @. vpa.scratch5 = correction3_integral_pieces * vpa_unnorm / vthe_lower
+            correction4_integral_type2_pieces = @. vpa.scratch6 = correction3_integral_type2_pieces * vpa_unnorm / vthe_lower
+            integral_type2 = sum(@view(correction4_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind]))
+            depsilon_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at wcut+ is included in depsilon_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * delta + integral_type2) * dsigma_dp
+                # Change in epsilon integral due to different interpolated ̂g values
+                + depsilon_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction4_integral_pieces
+            # and correction4_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction5_integral_pieces = @. vpa.scratch5 = correction4_integral_pieces * vpa_unnorm / vthe_lower
+            correction5_integral_type2_pieces = @. vpa.scratch6 = correction4_integral_type2_pieces * vpa_unnorm / vthe_lower
+            integral_type2 = sum(@view(correction5_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind]))
+            dzeta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at wcut+ is included in dzeta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * epsilon + integral_type2) * dsigma_dp
+                # Change in zeta integral due to different interpolated ̂g values
+                + dzeta_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction4_integral_pieces
+            # and correction4_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction6_integral_pieces = @. vpa.scratch5 = correction5_integral_pieces * vpa_unnorm / vthe_lower
+            correction6_integral_type2_pieces = @. vpa.scratch6 = correction5_integral_type2_pieces * vpa_unnorm / vthe_lower
+            integral_type2 = sum(@view(correction6_integral_type2_pieces[sigma_ind+1:upper_cutoff_ind]))
+            deta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at wcut+ is included in deta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * zeta + integral_type2) * dsigma_dp
+                # Change in eta integral due to different interpolated ̂g values
+                + deta_dp_interp
+               )
+
+            dA_dp, dB_dp, dC_dp, dD_dp = correction_matrix \ (
+                                             [da2_minus_a3_dp, dminus_b2_minus_b3_dp, dc2_minus_c3_dp, dminus_d2_minus_d3_dp]
+                                             - [dalpha_dp dbeta_dp    dgamma_dp   ddelta_dp   ;
+                                                dbeta_dp  dgamma_dp   ddelta_dp   depsilon_dp ;
+                                                dgamma_dp ddelta_dp   depsilon_dp dzeta_dp    ;
+                                                ddelta_dp depsilon_dp dzeta_dp    deta_dp
+                                               ]
+                                               * [A, B, C, D]
+                                            )
+
+            @views @. jacobian_zbegin_ppar[output_range] -=
+                          (dA_dp
+                           + dB_dp * v_over_vth
+                           + dC_dp * v_over_vth^2
+                           + dD_dp * v_over_vth^3) *
+                          integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) *
+                          pdf_lowerz[output_range]
+
+            # Add variation due to variation of ̃v coordinate with δp.
+            # These contributions seem to make almost no difference, and could probably be
+            # skipped.
+            dv_over_vth_dp = -dsigma_dp
+            @views @. jacobian_zbegin_ppar[output_range] -= (
+                          (B * dv_over_vth_dp
+                           + 2.0 * C * v_over_vth * dv_over_vth_dp
+                           + 3.0 * D * v_over_vth^2 * dv_over_vth_dp) *
+                          integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) *
+                          pdf_lowerz[output_range]
+                          +
+                          (A
+                           + B * v_over_vth
+                           + C * v_over_vth^2
+                           + D * v_over_vth^3) *
+                          (2.0 * integral_correction_sharpness * v_over_vth * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)
+                          - 2.0 * integral_correction_sharpness^2 * v_over_vth^3 * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)^2) *
+                          pdf_lowerz[output_range]
+                         )
+        end
+    end
+
+    if include_upper
+        shared_mem_parallel && begin_vperp_region()
+        @loop_vperp ivperp begin
+            # Skip vperp boundary points.
+            if vperp.n > 1 && ivperp == vperp.n
+                continue
+            end
+
+            # Get matrix entries for the response of the sheath-edge boundary condition.
+            # Ignore constraints, as these are non-linear and also should be small
+            # corrections which should not matter much for a preconditioner.
+
+            jac_range = pdf_offset+(zend-1)*vperp.n*vpa.n+(ivperp-1)*vpa.n+1 : pdf_offset+(zend-1)*vperp.n*vpa.n+ivperp*vpa.n
+            jacobian_zend = @view jacobian[jac_range,jac_range]
+            jacobian_zend_ppar = @view jacobian[jac_range,ppar_offset+zend]
+
+            vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+                element_with_zero, element_with_zero_boundary, first_point_near_zero,
+                reversed_wpa_of_minus_vpa =
+                    get_cutoff_params_upper(upar_upper, vthe_upper, phi_upper, me_over_mi,
+                                            vpa)
+
+            minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
+            # minus_vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
+            # minus_vcut_ind where -vcut is.
+            minus_vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
+
+            if minus_vcut_fraction < 0.5
+                first_nonzero_ind = minus_vcut_ind - 1
+            else
+                first_nonzero_ind = minus_vcut_ind
+            end
+
+            # Interpolate to the 'near zero' points
+            @views fill_interpolate_symmetric_matrix!(
+                       jacobian_zend[first_point_near_zero:sigma_ind,sigma_ind+1:element_with_zero_boundary],
+                       vpa_unnorm[first_point_near_zero:sigma_ind],
+                       vpa_unnorm[sigma_ind+1:element_with_zero_boundary])
+
+            # Interpolate to the 'far from zero' points
+            @views fill_1d_interpolation_matrix!(
+                       jacobian_zend[first_point_near_zero-1:-1:first_nonzero_ind,:],
+                       reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:vpa.n-first_nonzero_ind+1],
+                       vpa, vpa_spectral)
+
+            # Reverse the sign of the elements we just filled
+            jacobian_zend[first_nonzero_ind:sigma_ind,sigma_ind+1:end] .*= -1.0
+
+            if minus_vcut_fraction < 0.5
+                jacobian_zend[first_nonzero_ind,sigma_ind+1:end] .*= 0.5 - minus_vcut_fraction
+            else
+                jacobian_zend[first_nonzero_ind,sigma_ind+1:end] .*= 1.5 - minus_vcut_fraction
+            end
+
+            # Fill in elements giving response to changes in electron_ppar
+            # A change to ppar results in a shift in the location of w_∥(v_∥=0).
+            # The interpolated values of g_e(w_∥) that are filled by the boundary
+            # condition are (dropping _∥ subscripts for the remaining comments in this
+            # if-clause)
+            #   g(w|v<0) = g(2*u/vth - w)
+            # So
+            #   δg(w|v<0) = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δvth
+            #             = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δ(sqrt(2*ppar/n)
+            #             = dg/dw(2*u/vth - w) * (-2*u/vth^2) * δppar * vth / (2*ppar)
+            #             = -u/vth/ppar * dg/dw(2*u/vth - w) * δppar
+            #
+            # As jacobian_zend_ppar only depends on variations of a single value
+            # `ppar[end]` it might be more maintainable and computationally cheaper to
+            # calculate jacobian_zend_ppar with a finite difference method (applying the
+            # boundary condition function with a perturbed pressure `ppar[end] + ϵ` for
+            # some small ϵ) rather than calculating it using the method below. If we used
+            # the finite difference approach, we would have to be careful that the ϵ step
+            # does not change the cutoff index (otherwise we would pick up non-smooth
+            # changes) - one possibility might to be to switch to `-ϵ` instead of `+ϵ` if
+            # this happens.
+
+            dpdfdv_near_zero = @view vpa.scratch[first_point_near_zero:sigma_ind]
+            @views interpolate_symmetric!(dpdfdv_near_zero,
+                                          vpa_unnorm[first_point_near_zero:sigma_ind],
+                                          pdf_upper[sigma_ind+1:element_with_zero_boundary,ivperp],
+                                          vpa_unnorm[sigma_ind+1:element_with_zero_boundary],
+                                          Val(1))
+            # The above call to interpolate_symmetric calculates dg/dv rather than dg/dw,
+            # so need to multiply by an extra factor of vthe.
+            #   δg(w|v<0) = -u/ppar * dg/dv(2*u/vth - w) * δppar
+            @. jacobian_zend_ppar[first_point_near_zero:sigma_ind] -=
+                   -upar_upper / ppar_upper * dpdfdv_near_zero
+
+            dpdfdw_far_from_zero = @view vpa.scratch[first_nonzero_ind:first_point_near_zero-1]
+            @views interpolate_to_grid_1d!(dpdfdw_far_from_zero,
+                                           reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:vpa.n-first_nonzero_ind+1],
+                                           pdf_upper[:,ivperp], vpa, vpa_spectral, Val(1))
+            reverse!(dpdfdw_far_from_zero)
+            # Note that because we calculated the derivative of the interpolating
+            # function, and then reversed the results, we need to multiply the derivative
+            # by -1.
+            @. jacobian_zend_ppar[first_nonzero_ind:first_point_near_zero-1] -=
+                   upar_upper / vthe_upper / ppar_upper * dpdfdw_far_from_zero
+
+            # Whatever the variation due to interpolation is at the last nonzero grid
+            # point, it will be reduced by the cutoff.
+            if minus_vcut_fraction < 0.5
+                jacobian_zend_ppar[first_nonzero_ind] *= 0.5 - minus_vcut_fraction
+            else
+                jacobian_zend_ppar[first_nonzero_ind] *= 1.5 - minus_vcut_fraction
+            end
+
+            # The change in electron_ppar also changes the position of
+            #     wcut = (-vcut - upar)/vthe
+            # as vcut does not change (within the Krylov iteration where this
+            # preconditioner matrix is used), but vthe does because of the change in
+            # electron_ppar. We actually use minus_vcut_fraction calculated from
+            # vpa_unnorm, so it is most convenient to consider:
+            #     v = vthe * w + upar
+            #     δv = δvthe * w
+            #        = δvthe * (v - upar)/vthe
+            #        = δppar * vthe / (2*ppar) * (v - upar)/vthe
+            #        = δppar * (v - upar) / 2 / ppar
+            # with vl and vu the values of v at the grid points below and above vcut
+            #     minus_vcut_fraction = (-vcut - vl) / (vu - vl)
+            #     δminus_vcut_fraction = -(-vcut - vl) / (vu - vl)^2 * (δvu - δvl) - δvl / (vu - vl)
+            #     δminus_vcut_fraction = [-(-vcut - vl) / (vu - vl)^2 * (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar
+            #     δminus_vcut_fraction = [-(-vcut - vl) / (vu - vl) - (vl - upar) / (vu - vl)] * δppar / 2 / ppar
+            #     δminus_vcut_fraction = -(-vcut - upar) / (vu - vl) / 2 / ppar * δppar
+            #     δminus_vcut_fraction = (vcut + upar) / (vu - vl) / 2 / ppar * δppar
+            interpolated_pdf_at_first_nonzero_ind = @view vpa.scratch[1:1]
+            reversed_first_nonzero_ind = vpa.n-first_nonzero_ind+1
+            @views interpolate_to_grid_1d!(interpolated_pdf_at_first_nonzero_ind,
+                                           reversed_wpa_of_minus_vpa[reversed_first_nonzero_ind:reversed_first_nonzero_ind],
+                                           pdf_upper[:,ivperp], vpa, vpa_spectral)
+
+            dminus_vcut_fraction_dp = (vcut + upar_upper) / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) / 2.0 / ppar_upper
+            # Note that pdf_upper[first_nonzero_ind,ivperp] depends on -minus_vcut_fraction, so
+            # need a -'ve sign in the following line.
+            jacobian_zend_ppar[first_nonzero_ind] -= -interpolated_pdf_at_first_nonzero_ind[] * dminus_vcut_fraction_dp
+
+            # Calculate some numerical integrals of dpdfdw that we will need later
+            function get_part3_for_one_moment_upper(integral_pieces)
+                # Integral contribution from the cell containing sigma
+                # The contribution from integral_pieces[sigma_ind+1] should be dropped,
+                # because that point is on the input grid, and this correction is due to
+                # the changes in interpolated values on the output grid due to δp_e∥. The
+                # value of g_e at sigma_ind+1 isn't interpolated so does not change in
+                # this way.
+                integral_sigma_cell = 0.5 * integral_pieces[sigma_ind]
+
+                part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1])
+                part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
+
+                return part3
+            end
+            # The contents of jacobian_zend_ppar at this point are the coefficients needed
+            # to get the new distribution function due to a change δp, which we can now
+            # use to calculate the response of various integrals to the same change.  Note
+            # that jacobian_zend_ppar already contains `2.0*dsigma_dp`.
+            @. vpa.scratch = -jacobian_zend_ppar * vpa.wgts / sqrt(π)
+            da3_dp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            db3_dp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            dc3_dp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            dd3_dp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch = -jacobian_zend_ppar * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2)
+            vpa.scratch[sigma_ind:sigma_ind+1] .= 0.0
+            dalpha_dp_interp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            dbeta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            dgamma_dp_interp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            ddelta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            depsilon_dp_interp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            dzeta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch)
+            @. vpa.scratch *= vpa_unnorm / vthe_upper
+            deta_dp_interp = get_part3_for_one_moment_upper(vpa.scratch)
+
+            pdf_upperz = vpa.scratch10
+            @views @. pdf_upperz[sigma_ind:end] = pdf_upper[sigma_ind:end,ivperp]
+
+            # interpolate the pdf_upperz onto this grid
+            # 'near zero' means in the range where
+            # abs(v_∥)≤abs(upper boundary of element including v_∥=0)
+            # 'far from zero' means more negative values of v_∥.
+
+            # Interpolate to the 'near zero' points
+            @views interpolate_symmetric!(pdf_upperz[first_point_near_zero:sigma_ind],
+                                          vpa_unnorm[first_point_near_zero:sigma_ind],
+                                          pdf_upperz[sigma_ind+1:element_with_zero_boundary],
+                                          vpa_unnorm[sigma_ind+1:element_with_zero_boundary])
+
+            # Interpolate to the 'far from zero' points
+            reversed_pdf_far_from_zero = @view pdf_upperz[1:first_point_near_zero-1]
+            interpolate_to_grid_1d!(reversed_pdf_far_from_zero,
+                                    reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end],
+                                    pdf_upperz, vpa, vpa_spectral)
+            reverse!(reversed_pdf_far_from_zero)
+
+            plus_vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
+            if plus_vcut_fraction > 0.5
+                upper_cutoff_ind = plus_vcut_ind+1
+            else
+                upper_cutoff_ind = plus_vcut_ind
+            end
+            if minus_vcut_fraction < 0.5
+                lower_cutoff_ind = minus_vcut_ind-1
+                lower_cutoff_factor = 0.5 - minus_vcut_fraction
+            else
+                lower_cutoff_ind = minus_vcut_ind
+                lower_cutoff_factor = 1.5 - minus_vcut_fraction
+            end
+            pdf_upperz[lower_cutoff_ind] *= lower_cutoff_factor
+            pdf_upperz[1:lower_cutoff_ind-1] .= 0.0
+
+            a2, b2, c2, d2, a3, b3, c3, d3, alpha, beta, gamma, delta, epsilon, zeta, eta =
+                get_upperz_integral_correction_components(
+                    pdf_upperz, vthe_upper, vpa, vpa_unnorm, u_over_vt,
+                    sigma_ind, sigma_fraction, vcut, minus_vcut_ind, plus_vcut_ind, false)
+
+            output_range = lower_cutoff_ind:sigma_ind
+
+            v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_upper
+
+            correction_matrix = [alpha beta    gamma   delta   ;
+                                 beta  gamma   delta   epsilon ;
+                                 gamma delta   epsilon zeta    ;
+                                 delta epsilon zeta    eta
+                                ]
+
+            # jacobian_zbegin_ppar state at this point would generate (when multiplied by
+            # some δp) a δf due to the effectively-shifted grid. The unperturbed f
+            # required corrections to make its integrals correct. Need to apply the same
+            # corrections to δf.
+            # This term seems to have a very small contribution, and could probably be
+            # skipped.
+            A, B, C, D = correction_matrix \ [a2-a3, -b2-b3, c2-c3, -d2-d3]
+            @. jacobian_zend_ppar[output_range] *=
+                   1.0 +
+                   (A
+                    + B * v_over_vth
+                    + C * v_over_vth^2
+                    + D * v_over_vth^3) *
+                   integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2)
+
+            # Calculate the changes in the integrals a2, b2, c2, d2, a3, b3, c3, and d3 in
+            # response to changes in electron_ppar. The changes are calculated for the
+            # combinations (a2-a3), (-b2-b3), (c2-c3), and (-d2-d3) to take advantage of
+            # some cancellations.
+
+            # Need to calculate the variation of various intermediate quantities with
+            # δp.
+            #
+            #   vth = sqrt(2*p/n)
+            #   δvth = vth / (2 * p) * δp
+            #
+            #   sigma = -u / vth
+            #   δsigma = u / vth^2 δvth
+            #          = u / (2 * vth * p) * δp
+            #
+            # We could write sigma_fraction as
+            #   sigma_fraction = (sigma - vpa[sigma_ind]) / (vpa[sigma_ind+1] - vpa[sigma_ind])
+            # so that
+            #   δsigma_fraction = δsigma / (vpa[sigma_ind+1] - vpa[sigma_ind])
+            #                   = u / (2 * vth * p) / (vpa[sigma_ind+1] - vpa[sigma_ind]) * δp
+            #
+            #   minus_vcut_fraction = ((-vcut - u)/vth - vpa[minus_vcut_ind-1]) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1])
+            #   δminus_vcut_fraction = (vcut + u) / vth^2 / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δvth
+            #                        = (vcut + u) / (2 * vth * p) / (vpa[minus_vcut_ind] - vpa[minus_vcut_ind-1]) * δp
+            #
+            #   plus_vcut_fraction = ((vcut - u)/vth - vpa[plus_vcut_ind-1]) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind])
+            #   δplus_vcut_fraction = -(vcut - u) / vth^2 / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δvth
+            #                       = -(vcut - u) / (2 * vth * p) / (vpa[plus_vcut_ind+1] - vpa[plus_vcut_ind]) * δp
+
+            density_integral_pieces_upperz = vpa.scratch3
+            flow_integral_pieces_upperz = vpa.scratch4
+            energy_integral_pieces_upperz = vpa.scratch5
+            cubic_integral_pieces_upperz = vpa.scratch6
+            quartic_integral_pieces_upperz = vpa.scratch7
+            fill_integral_pieces!(
+                pdf_upperz, vthe_upper, vpa, vpa_unnorm, density_integral_pieces_upperz,
+                flow_integral_pieces_upperz, energy_integral_pieces_upperz,
+                cubic_integral_pieces_upperz, quartic_integral_pieces_upperz)
+            vpa_grid = vpa.grid
+
+            dsigma_dp = upar_upper / (2.0 * vthe_upper * ppar_upper)
+
+            dsigma_fraction_dp = dsigma_dp / (vpa_grid[sigma_ind+1] - vpa_grid[sigma_ind])
+
+            dminus_vcut_fraction_dp = (vcut + upar_upper) / (2.0 * vthe_upper * ppar_upper) / (vpa_grid[minus_vcut_ind] - vpa_grid[minus_vcut_ind-1])
+
+            dplus_vcut_fraction_dp = -(vcut - upar_upper) / (2.0 * vthe_upper * ppar_upper) / (vpa_grid[plus_vcut_ind+1] - vpa_grid[plus_vcut_ind])
+
+
+            density_integral_sigma_cell = 0.5 * (density_integral_pieces_upperz[sigma_ind] +
+                                                 density_integral_pieces_upperz[sigma_ind+1])
+            da2_minus_a3_dp = (
+                # Contribution from integral limits at sigma
+                -2.0 * density_integral_sigma_cell * dsigma_fraction_dp
+                # Contribution from integral limits at wcut+. The contribution from -wcut-
+                # is included in da3_dp
+                + density_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp
+                # No contribution from w-integral for a2 or a3 as integrand does not
+                # depend on ppar.
+                # Change in a3 integral due to different interpolated ̂g values
+                - da3_dp
+               )
+
+            dminus_b2_minus_b3_dp = (
+                # Contribution from integral limits at sigma cancel exactly
+                # Contribution from integral limits at wcut+. The contribution from -wcut-
+                # is included in db3_dp
+                - flow_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (a2 + a3) * dsigma_dp
+                # Change in b3 integral due to different interpolated ̂g values
+                - db3_dp
+               )
+
+            energy_integral_sigma_cell = 0.5 * (energy_integral_pieces_upperz[sigma_ind] +
+                                                energy_integral_pieces_upperz[sigma_ind+1])
+            dc2_minus_c3_dp = (
+                # Contribution from integral limits at sigma
+                -2.0 * energy_integral_sigma_cell * dsigma_fraction_dp
+                # Contribution from integral limits at wcut+. The contribution from -wcut-
+                # is included in dc3_dp
+                + energy_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + 2.0 * (-b2 + b3) * dsigma_dp
+                # Change in c3 integral due to different interpolated ̂g values
+                - dc3_dp
+               )
+
+            dminus_d2_minus_d3_dp = (
+                # Contribution from integral limits at sigma cancel exactly
+                # Contribution from integral limits at wcut+. The contribution from -wcut-
+                # is included in dc3_dp
+                - cubic_integral_pieces_upperz[upper_cutoff_ind] * dplus_vcut_fraction_dp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + 3.0 * (c2 + c3) * dsigma_dp
+                # Change in d3 integral due to different interpolated ̂g values
+                - dd3_dp
+               )
+
+            correction0_integral_pieces = @. vpa.scratch3 = pdf_upperz * vpa.wgts / sqrt(π) * integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2)
+            for ivpa ∈ sigma_ind:vpa.n
+                correction0_integral_pieces[ivpa] = 0.0
+            end
+
+            correctionminus1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces / vpa_unnorm * vthe_upper
+            integralminus1 = sum(@view(correctionminus1_integral_pieces[lower_cutoff_ind:sigma_ind-1]))
+
+            correction0_integral_type2_pieces = @. vpa.scratch4 = pdf_upperz * vpa.wgts / sqrt(π) * 2.0 * integral_correction_sharpness^2 * vpa_unnorm^3 / vthe_upper^3 / (1.0 + integral_correction_sharpness * vpa_unnorm^2 / vthe_upper^2)^2
+            integral_type2 = sum(@view(correction0_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1]))
+            dalpha_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at -wcut- is included in dalpha_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                 (-2.0 * integralminus1 + integral_type2) * dsigma_dp
+                # Change in alpha integral due to different interpolated ̂g values
+                + dalpha_dp_interp
+               )
+
+            correction1_integral_pieces = @. vpa.scratch5 = correction0_integral_pieces * vpa_unnorm / vthe_upper
+            correction1_integral_type2_pieces = @. vpa.scratch6 = correction0_integral_type2_pieces * vpa_unnorm / vthe_upper
+            integral_type2 = sum(@view(correction1_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1]))
+            dbeta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at -wcut- is included in dbeta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * alpha + integral_type2) * dsigma_dp
+                # Change in beta integral due to different interpolated ̂g values
+                + dbeta_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction1_integral_pieces
+            # and correction1_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe_upper
+            correction2_integral_type2_pieces = @. vpa.scratch6 = correction1_integral_type2_pieces * vpa_unnorm / vthe_upper
+            integral_type2 = sum(@view(correction2_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1]))
+            dgamma_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at -wcut- is included in dgamma_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * beta + integral_type2) * dsigma_dp
+                # Change in gamma integral due to different interpolated ̂g values
+                + dgamma_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction2_integral_pieces
+            # and correction2_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction3_integral_pieces = @. vpa.scratch5 = correction2_integral_pieces * vpa_unnorm / vthe_upper
+            correction3_integral_type2_pieces = @. vpa.scratch6 = correction2_integral_type2_pieces * vpa_unnorm / vthe_upper
+            integral_type2 = sum(@view(correction3_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1]))
+            ddelta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at -wcut- is included in ddelta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * gamma + integral_type2) * dsigma_dp
+                # Change in delta integral due to different interpolated ̂g values
+                + ddelta_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction3_integral_pieces
+            # and correction3_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction4_integral_pieces = @. vpa.scratch5 = correction3_integral_pieces * vpa_unnorm / vthe_upper
+            correction4_integral_type2_pieces = @. vpa.scratch6 = correction3_integral_type2_pieces * vpa_unnorm / vthe_upper
+            integral_type2 = sum(@view(correction4_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1]))
+            depsilon_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at -wcut- is included in depsilon_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * delta + integral_type2) * dsigma_dp
+                # Change in epsilon integral due to different interpolated ̂g values
+                + depsilon_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction4_integral_pieces
+            # and correction4_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction5_integral_pieces = @. vpa.scratch5 = correction4_integral_pieces * vpa_unnorm / vthe_upper
+            correction5_integral_type2_pieces = @. vpa.scratch6 = correction4_integral_type2_pieces * vpa_unnorm / vthe_upper
+            integral_type2 = sum(@view(correction5_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1]))
+            dzeta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at -wcut- is included in dzeta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * epsilon + integral_type2) * dsigma_dp
+                # Change in zeta integral due to different interpolated ̂g values
+                + dzeta_dp_interp
+               )
+
+            # Here we overwrite the buffers that were used for correction4_integral_pieces
+            # and correction4_integral_type2_pieces, but this is OK as we never need those
+            # arrays again.
+            correction6_integral_pieces = @. vpa.scratch5 = correction5_integral_pieces * vpa_unnorm / vthe_upper
+            correction6_integral_type2_pieces = @. vpa.scratch6 = correction5_integral_type2_pieces * vpa_unnorm / vthe_upper
+            integral_type2 = sum(@view(correction6_integral_type2_pieces[lower_cutoff_ind:sigma_ind-1]))
+            deta_dp = (
+                # The grid points either side of sigma are zero-ed out for these
+                # corrections, so this boundary does not contribute.
+                # Contribution from integral limit at -wcut- is included in deta_dp_interp
+                # Contribution from w-integral due to variation of integrand with ppar.
+                + (-2.0 * zeta + integral_type2) * dsigma_dp
+                # Change in eta integral due to different interpolated ̂g values
+                + deta_dp_interp
+               )
+
+            dA_dp, dB_dp, dC_dp, dD_dp = correction_matrix \ (
+                                             [da2_minus_a3_dp, dminus_b2_minus_b3_dp, dc2_minus_c3_dp, dminus_d2_minus_d3_dp]
+                                             - [dalpha_dp dbeta_dp    dgamma_dp   ddelta_dp   ;
+                                                dbeta_dp  dgamma_dp   ddelta_dp   depsilon_dp ;
+                                                dgamma_dp ddelta_dp   depsilon_dp dzeta_dp    ;
+                                                ddelta_dp depsilon_dp dzeta_dp    deta_dp
+                                               ]
+                                               * [A, B, C, D]
+                                              )
+
+            output_range = lower_cutoff_ind:sigma_ind-1
+            v_over_vth = @views @. vpa.scratch[output_range] = vpa_unnorm[output_range] / vthe_upper
+            @views @. jacobian_zend_ppar[output_range] -=
+                          (dA_dp
+                           + dB_dp * v_over_vth
+                           + dC_dp * v_over_vth^2
+                           + dD_dp * v_over_vth^3) *
+                          integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) *
+                          pdf_upperz[output_range]
+
+            # Add variation due to variation of ̃v coordinate with δp.
+            # These contributions seem to make almost no difference, and could probably be
+            # skipped.
+            dv_over_vth_dp = -dsigma_dp
+            @views @. jacobian_zend_ppar[output_range] -= (
+                          (B * dv_over_vth_dp
+                           + 2.0 * C * v_over_vth * dv_over_vth_dp
+                           + 3.0 * D * v_over_vth^2 * dv_over_vth_dp) *
+                          integral_correction_sharpness * v_over_vth^2 / (1.0 + integral_correction_sharpness * v_over_vth^2) *
+                          pdf_upperz[output_range]
+                          +
+                          (A
+                           + B * v_over_vth
+                           + C * v_over_vth^2
+                           + D * v_over_vth^3) *
+                          (2.0 * integral_correction_sharpness * v_over_vth * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)
+                          - 2.0 * integral_correction_sharpness^2 * v_over_vth^3 * dv_over_vth_dp / (1.0 + integral_correction_sharpness * v_over_vth^2)^2) *
+                          pdf_upperz[output_range]
+                         )
+        end
+    end
+
+    return nothing
+end
+
 """
     electron_adaptive_timestep_update!(scratch, t, t_params, moments, phi, z_advect,
                                        vpa_advect, composition, r, z, vperp, vpa,
@@ -3374,11 +4564,12 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
 `evolve_ppar=true`) the electron energy equation.
 """
 @timeit global_timer fill_electron_kinetic_equation_Jacobian!(
-                         jacobian_matrix, f, ppar, moments, collisions, composition, z,
-                         vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
-                         vpa_advect, scratch_dummy, external_source_settings,
-                         num_diss_params, t_params, ion_dt, ir, evolve_ppar,
-                         include=:all, include_qpar_integral_terms=true) = begin
+                         jacobian_matrix, f, ppar, moments, phi_global, collisions,
+                         composition, z, vperp, vpa, z_spectral, vperp_spectral,
+                         vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                         external_source_settings, num_diss_params, t_params, ion_dt, ir,
+                         evolve_ppar, include=:all,
+                         include_qpar_integral_terms=true) = begin
     dt = t_params.dt[]
 
     buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1]
@@ -3396,6 +4587,7 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
     dppar_dz = @view moments.electron.dppar_dz[:,ir]
     dvth_dz = @view moments.electron.dvth_dz[:,ir]
     dqpar_dz = @view moments.electron.dqpar_dz[:,ir]
+    phi = @view phi_global[:,ir]
 
     upar_ion = @view moments.ion.upar[:,ir,1]
 
@@ -3517,19 +4709,24 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
         add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(
             jacobian_matrix, z, dt, ion_dt, ir, include; ppar_offset=pdf_size)
     end
+    if t_params.include_wall_bc_in_preconditioner
+        add_wall_boundary_condition_to_Jacobian!(
+            jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral,
+            vpa_spectral, vpa_advect, moments,
+            num_diss_params.electron.vpa_dissipation_coefficient, me, ir, include;
+            ppar_offset=pdf_size)
+    end
 
     return nothing
 end
 
 """
-    fill_electron_kinetic_equation_v_only_Jacobian!(jacobian_matrix, f, ppar, moments,
-                                                    collisions, composition, z, vperp,
-                                                    vpa, z_spectral, vperp_specral,
-                                                    vpa_spectral, z_advect, vpa_advect,
-                                                    scratch_dummy,
-                                                    external_source_settings,
-                                                    num_diss_params, t_params, ion_dt, ir,
-                                                    iz, evolve_ppar, include=:all)
+    fill_electron_kinetic_equation_v_only_Jacobian!()
+        jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, zeroth_moment,
+        first_moment, second_moment, third_moment, dthird_moment_dz, phi, collisions,
+        composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+        vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params,
+        ion_dt, ir, iz, evolve_ppar)
 
 Fill a pre-allocated matrix with the Jacobian matrix for a velocity-space solve part of
 the ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron
@@ -3538,7 +4735,7 @@ energy equation.
 @timeit global_timer fill_electron_kinetic_equation_v_only_Jacobian!(
                          jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments,
                          zeroth_moment, first_moment, second_moment, third_moment,
-                         dthird_moment_dz, collisions, composition, z, vperp, vpa,
+                         dthird_moment_dz, phi, collisions, composition, z, vperp, vpa,
                          z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect,
                          scratch_dummy, external_source_settings, num_diss_params,
                          t_params, ion_dt, ir, iz, evolve_ppar) = begin
@@ -3557,7 +4754,6 @@ energy equation.
 
     upar_ion = moments.ion.upar[iz,ir,1]
 
-    pdf_size = z.n * vperp.n * vpa.n
     v_size = vperp.n * vpa.n
 
     # Initialise jacobian_matrix to the identity
@@ -3601,6 +4797,14 @@ energy equation.
             jacobian_matrix, z, dt, ion_dt, ir, iz)
     end
 
+    if t_params.include_wall_bc_in_preconditioner
+        add_wall_boundary_condition_to_Jacobian!(
+            jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral,
+            vpa_spectral, vpa_advect, moments,
+            num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :implicit_v, iz;
+            ppar_offset=v_size)
+    end
+
     return nothing
 end
 
diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl
index 1b616116d..8bfd386ea 100644
--- a/moment_kinetics/src/gauss_legendre.jl
+++ b/moment_kinetics/src/gauss_legendre.jl
@@ -29,8 +29,9 @@ using SparseMatricesCSR
 using ..type_definitions: mk_float, mk_int
 using ..array_allocation: allocate_float
 import ..calculus: elementwise_derivative!, mass_matrix_solve!
-import ..interpolation: single_element_interpolate!
-using ..lagrange_polynomials: lagrange_poly_optimised
+import ..interpolation: single_element_interpolate!,
+                        fill_single_element_interpolation_matrix!
+using ..lagrange_polynomials: lagrange_poly_optimised, lagrange_poly_derivative_optimised
 using ..moment_kinetics_structs: weak_discretization_info
 
 
@@ -386,7 +387,8 @@ end
 Function to perform interpolation on a single element.
 """
 function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord,
-                                     gausslegendre::gausslegendre_base_info)
+                                     gausslegendre::gausslegendre_base_info,
+                                     derivative::Val{0})
     n_new = length(newgrid)
 
     i = 1
@@ -408,6 +410,50 @@ function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, c
     return nothing
 end
 
+"""
+Function to carry out a 1D (global) mass matrix solve.
+"""
+# Evaluate first derivative of the interpolating function
+function single_element_interpolate!(result, newgrid, f, imin, imax, ielement, coord,
+                                     gausslegendre::gausslegendre_base_info,
+                                     derivative::Val{1})
+    n_new = length(newgrid)
+
+    i = 1
+    other_nodes = @view coord.other_nodes[:,i,ielement]
+    one_over_denominator = coord.one_over_denominator[i,ielement]
+    this_f = f[i]
+    for j ∈ 1:n_new
+        result[j] = this_f * lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, newgrid[j])
+    end
+    for i ∈ 2:coord.ngrid
+        other_nodes = @view coord.other_nodes[:,i,ielement]
+        one_over_denominator = coord.one_over_denominator[i,ielement]
+        this_f = f[i]
+        for j ∈ 1:n_new
+            result[j] += this_f * lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, newgrid[j])
+        end
+    end
+
+    return nothing
+end
+
+function fill_single_element_interpolation_matrix!(
+             matrix_slice, newgrid, jelement, coord,
+             gausslegendre::gausslegendre_base_info)
+    n_new = length(newgrid)
+
+    for j ∈ 1:coord.ngrid
+        other_nodes = @view coord.other_nodes[:,j,jelement]
+        one_over_denominator = coord.one_over_denominator[j,jelement]
+        for i ∈ 1:n_new
+            matrix_slice[i,j] = lagrange_poly_optimised(other_nodes, one_over_denominator, newgrid[i])
+        end
+    end
+
+    return nothing
+end
+
 """
 Function to carry out a 1D (global) mass matrix solve.
 """
diff --git a/moment_kinetics/src/hermite_spline_interpolation.jl b/moment_kinetics/src/hermite_spline_interpolation.jl
index 255c5da91..03453fbfc 100644
--- a/moment_kinetics/src/hermite_spline_interpolation.jl
+++ b/moment_kinetics/src/hermite_spline_interpolation.jl
@@ -21,9 +21,16 @@ coord : coordinate
 not_spectral : finite_difference_info
     A finite_difference_info argument here indicates that the coordinate is not
     spectral-element discretized, i.e. it is on a uniform ('finite difference') grid.
+derivative : Val(n)
+    The value of `n` the integer in the `Val{n}` indicates the order of the derivative to
+    be calculated of the interpolating function (only a few values of `n` are supported).
+    Defaults to Val(0), which means just calculating the interpolating function itself.
 """
+function interpolate_to_grid_1d! end
+
 function interpolate_to_grid_1d!(result, new_grid, f, coord,
-                                 not_spectral::finite_difference_info)
+                                 not_spectral::finite_difference_info,
+                                 derivative::Val{0}=Val(0))
     x = coord.grid
     n_new = length(new_grid)
 
@@ -101,5 +108,10 @@ function interpolate_to_grid_1d!(result, new_grid, f, coord,
     return nothing
 end
 
+function interpolate_to_grid_1d!(result, new_grid, f, coord,
+                                 not_spectral::finite_difference_info,
+                                 derivative::Val{1})
+    error("First derivative interpolation not implemented for finite-difference yet.")
+end
 
 end # hermite_spline_interpolation
diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl
index df191e544..ca9856868 100644
--- a/moment_kinetics/src/initial_conditions.jl
+++ b/moment_kinetics/src/initial_conditions.jl
@@ -761,6 +761,8 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field
                                    nl_solver_params.electron_advance.global_precon_iterations,
                                    nl_solver_params.electron_advance.solves_since_precon_update,
                                    nl_solver_params.electron_advance.precon_dt,
+                                   nl_solver_params.electron_advance.precon_lowerz_vcut_inds,
+                                   nl_solver_params.electron_advance.precon_upperz_vcut_inds,
                                    nl_solver_params.electron_advance.serial_solve,
                                    nl_solver_params.electron_advance.max_nonlinear_iterations_this_step,
                                    nl_solver_params.electron_advance.max_linear_iterations_this_step,
diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index 094773a00..a367b2a68 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -89,6 +89,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero
     cap_factor_ion_dt::mk_float
     max_pseudotimesteps::mk_int
     max_pseudotime::mk_float
+    include_wall_bc_in_preconditioner::Bool
     write_after_fixed_step_count::Bool
     error_sum_zero::Terrorsum
     split_operators::Bool
diff --git a/moment_kinetics/src/interpolation.jl b/moment_kinetics/src/interpolation.jl
index 69041a463..c87d6b104 100644
--- a/moment_kinetics/src/interpolation.jl
+++ b/moment_kinetics/src/interpolation.jl
@@ -5,7 +5,8 @@ Note these are not guaranteed to be highly optimized!
 """
 module interpolation
 
-export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric!
+export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric!,
+       fill_interpolate_symmetric_matrix!
 
 using ..array_allocation: allocate_float
 using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info
@@ -44,10 +45,15 @@ coord : coordinate
 spectral : discretization_info
     struct containing information for discretization, whose type determines which method
     is used.
+derivative : Val(n)
+    The value of `n` the integer in the `Val{n}` indicates the order of the derivative to
+    be calculated of the interpolating function (only a few values of `n` are supported).
+    Defaults to Val(0), which means just calculating the interpolating function itself.
 """
 function interpolate_to_grid_1d! end
 
-function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral)
+function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral,
+                                 derivative::Val=Val(0))
     # define local variable nelement for convenience
     nelement = coord.nelement_local
 
@@ -59,7 +65,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral)
     # Assumes points in newgrid are sorted.
     # May not be the most efficient algorithm.
     # Find start/end points for each element, storing their indices in kstart
-    kstart = Vector{mk_int}(undef, nelement+1)
+    kstart = coord.scratch_int_nelement_plus_1
 
     # First element includes both boundary points, while all others have only one (to
     # avoid duplication), so calculate the first element outside the loop.
@@ -82,10 +88,21 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral)
     end
 
     # check to see if any of the newgrid points are to the left of the first grid point
-    for j ∈ 1:kstart[1]-1
-        # if the new grid location is outside the bounds of the original grid,
-        # extrapolate f with Gaussian-like decay beyond the domain
-        result[j] = f[1] * exp(-(coord.grid[1] - newgrid[j])^2)
+    if isa(derivative, Val{0})
+        for j ∈ 1:kstart[1]-1
+            # if the new grid location is outside the bounds of the original grid,
+            # extrapolate f with Gaussian-like decay beyond the domain
+            result[j] = f[1] * exp(-(coord.grid[1] - newgrid[j])^2)
+        end
+    elseif isa(derivative, Val{1})
+        for j ∈ 1:kstart[1]-1
+            # if the new grid location is outside the bounds of the original grid,
+            # extrapolate f with Gaussian-like decay beyond the domain, then take
+            # derivative.
+            result[j] = 2.0 * (coord.grid[1] - newgrid[j]) * f[1] * exp(-(coord.grid[1] - newgrid[j])^2)
+        end
+    else
+        error("Unrecognised derivative=$derivative")
     end
     @inbounds for j ∈ 1:nelement
         # Search from kstart[j] to try to speed up the sort, but means result of
@@ -100,7 +117,7 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral)
         kmax = kstart[2] - 1
         @views single_element_interpolate!(result[kmin:kmax], newgrid[kmin:kmax],
                                            f[imin:imax], imin, imax, 1, coord,
-                                           first_element_spectral)
+                                           first_element_spectral, derivative)
     end
     @inbounds for j ∈ 2:nelement
         kmin = kstart[j]
@@ -110,19 +127,115 @@ function interpolate_to_grid_1d!(result, newgrid, f, coord, spectral)
             imax = coord.imax[j]
             @views single_element_interpolate!(result[kmin:kmax], newgrid[kmin:kmax],
                                                f[imin:imax], imin, imax, j, coord,
-                                               spectral.lobatto)
+                                               spectral.lobatto, derivative)
         end
     end
 
-    for k ∈ kstart[nelement+1]:n_new
-        result[k] = f[end] * exp(-(newgrid[k] - coord.grid[end])^2)
+    if isa(derivative, Val{0})
+        for k ∈ kstart[nelement+1]:n_new
+            result[k] = f[end] * exp(-(newgrid[k] - coord.grid[end])^2)
+        end
+    elseif isa(derivative, Val{1})
+        for k ∈ kstart[nelement+1]:n_new
+            result[k] = -2.0 * (newgrid[k] - coord.grid[end]) * f[end] * exp(-(newgrid[k] - coord.grid[end])^2)
+        end
+    else
+        error("Unrecognised derivative=$derivative")
+    end
+
+    return nothing
+end
+
+"""
+    fill_single_element_interpolation_matrix!(
+        matrix_slice, newgrid, jelement, coord, spectral)
+
+Set `matrix_slice` equal to the interpolation matrix that interpolates values from the
+element `jelement` of the vector being multiplied onto the grid points given by `newgrid`
+(which must be contained within the physical space covered by element `jelement`).
+
+`coord` is the `coordinate` object for the dimension in which the interpolation is done,
+and `spectral` the discretization object corresponding to `jelement`.
+"""
+function fill_single_element_interpolation_matrix! end
+
+function fill_1d_interpolation_matrix!(matrix, newgrid, coord, spectral)
+    # define local variable nelement for convenience
+    nelement = coord.nelement_local
+
+    n_new = size(newgrid)[1]
+    # Find which points belong to which element.
+    # istart[j] contains the index of the first point in newgrid that is within element
+    # j, and istart[nelement+1] is n_new if the last point is within coord.grid, or the
+    # index of the first element outside coord.grid otherwise.
+    # Assumes points in newgrid are sorted.
+    # May not be the most efficient algorithm.
+    # Find start/end points for each element, storing their indices in istart
+    istart = coord.scratch_int_nelement_plus_1
+
+    # First element includes both boundary points, while all others have only one (to
+    # avoid duplication), so calculate the first element outside the loop.
+    if coord.radau_first_element && coord.irank == 0
+        first_element_spectral = spectral.radau
+        # For a grid with a Radau first element, the lower boundary of the first element
+        # is at coord=0, and the coordinate range is 0<coord<∞ so we will never need to to
+        # extrapolate to negative values, and points between coord=0 and coord.grid[1] are
+        # really within the first element, so the index for the first point greater than 0
+        # (the left boundary of the coordinate grid) in `newgrid` is always 1.
+        istart[1] = 1
+
+        # In a coordinate with a Radau first element, no point should be less than zero.
+        # If bounds checking is enabled, check that first `newgrid` point is ≥0.
+        @boundscheck newgrid[1] ≥ 0.0
+    else
+        first_element_spectral = spectral.lobatto
+        # set the starting index by finding the start of coord.grid
+        istart[1] = searchsortedfirst(newgrid, coord.grid[1])
+    end
+
+    # check to see if any of the newgrid points are to the left of the first grid point
+    for i ∈ 1:istart[1]-1
+        # if the new grid location is outside the bounds of the original grid,
+        # extrapolate f with Gaussian-like decay beyond the domain
+        matrix[i,1] = exp(-(coord.grid[1] - newgrid[j])^2)
+    end
+    @inbounds for j ∈ 1:nelement
+        # Search from istart[j] to try to speed up the sort, but means result of
+        # searchsortedfirst() is offset by istart[j]-1 from the beginning of newgrid.
+        istart[j+1] = istart[j] - 1 + @views searchsortedfirst(newgrid[istart[j]:end], coord.grid[coord.imax[j]])
+    end
+
+    if istart[1] < istart[2]
+        jmin = coord.imin[1]
+        jmax = coord.imax[1]
+        imin = istart[1]
+        imax = istart[2] - 1
+        @views fill_single_element_interpolation_matrix!(
+                   matrix[imin:imax,jmin:jmax], newgrid[imin:imax], 1, coord,
+                   first_element_spectral)
+    end
+    @inbounds for j ∈ 2:nelement
+        imin = istart[j]
+        imax = istart[j+1] - 1
+        if imin <= imax
+            jmin = coord.imin[j] - 1
+            jmax = coord.imax[j]
+            @views fill_single_element_interpolation_matrix!(
+                       matrix[imin:imax,jmin:jmax], newgrid[imin:imax], j, coord,
+                       spectral.lobatto)
+        end
+    end
+
+    for i ∈ istart[nelement+1]:n_new
+        matrix[i,end] = exp(-(newgrid[i] - coord.grid[end])^2)
     end
 
     return nothing
 end
 
 function interpolate_to_grid_1d!(result, new_grid, f, coord,
-                                 spectral::null_spatial_dimension_info)
+                                 spectral::null_spatial_dimension_info,
+                                 derivative::Val{0})
     # There is only one point in the 'old grid' represented by coord (as indicated by the
     # type of the `spectral` argument), and we are interpolating in a spatial dimension.
     # Assume that the variable should be taken to be constant in this dimension to
@@ -133,7 +246,8 @@ function interpolate_to_grid_1d!(result, new_grid, f, coord,
 end
 
 function interpolate_to_grid_1d!(result, new_grid, f, coord,
-                                 spectral::null_velocity_dimension_info)
+                                 spectral::null_velocity_dimension_info,
+                                 derivative::Val{0})
     # There is only one point in the 'old grid' represented by coord (as indicated by the
     # type of the `spectral` argument), and we are interpolating in a velocity space
     # dimension. Assume that the profile 'should be' a Maxwellian over the new grid, with
@@ -159,6 +273,10 @@ coord : coordinate
 spectral : Bool or chebyshev_info
     struct containing information for discretization, whose type determines which method
     is used.
+derivative : Val(n)
+    The value of `n` the integer in the `Val{n}` indicates the order of the derivative to
+    be calculated of the interpolating function (only a few values of `n` are supported).
+    Defaults to Val(0), which means just calculating the interpolating function itself.
 
 Returns
 -------
@@ -276,14 +394,19 @@ function interpolate_to_grid_vpa(newgrid, f::AbstractVector{mk_float}, vpa, spec
 end
 
 """
-    interpolate_symmetric!(result, newgrid, f, oldgrid)
+    interpolate_symmetric!(result, newgrid, f, oldgrid, derivative=Val(0))
 
 Interpolate f from oldgrid to newgrid, imposing that `f(x)` is symmetric around `x=0`, so
 the interpolation is done by fitting a polynomial in `x^2` to the values of `f` given on
 `oldgrid`, and evaluating on `newgrid`. Since interpolation is done in a polynomial of
 `x^2`, the signs of the points on `newgrid` and `oldgrid` do not matter, and are ignored.
+
+`Val(n)` can be passed as `derivative` to calculate the derivative of order `n` of the
+interpolating function (only a few values of `n` are supported).
 """
-function interpolate_symmetric!(result, newgrid, f, oldgrid)
+function interpolate_symmetric! end
+
+function interpolate_symmetric!(result, newgrid, f, oldgrid, derivative::Val{0}=Val(0))
     nnew = length(newgrid)
     nold = length(oldgrid)
 
@@ -314,4 +437,67 @@ function interpolate_symmetric!(result, newgrid, f, oldgrid)
     return nothing
 end
 
+function interpolate_symmetric!(result, newgrid, f, oldgrid, derivative::Val{1})
+    nnew = length(newgrid)
+    nold = length(oldgrid)
+
+    if nnew == 0
+        return nothing
+    end
+
+    # Check all points in newgrid are covered by oldgrid (i.e. between zero and the
+    # maximum of oldgrid)
+    @boundscheck maximum(abs.(newgrid)) ≤ maximum(abs.(oldgrid)) || error("newgrid bigger ($(maximum(abs.(newgrid)))) than oldgrid ($(maximum(abs.(oldgrid)))).")
+    @boundscheck size(result) == size(newgrid) || error("Size of result ($(size(result))) is not the same as size of newgrid ($(size(newgrid))).")
+    @boundscheck size(f) == size(oldgrid) || error("Size of f ($(size(f))) is not the same as size of oldgrid ($(size(oldgrid))).")
+
+    if nold == 1
+        # Interpolating 'polynomial' is just a constant
+        result .= 0.0
+    elseif nold == 2
+        # Interpolating polynomial is linear, so result is a constant
+        # df/dx = f[1] / (oldgrid[1] - oldgrid[2]) + f[2] / (oldgrid[2] - oldgrid[1])
+        return (f[1] - f[2]) / (oldgrid[1] - oldgrid[2])
+    else
+        result .= 0.0
+        for j ∈ 1:nold
+            one_over_denominator = 1.0 / prod((oldgrid[j]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j)
+            this_f = f[j]
+            for i ∈ 1:nnew
+                temp = 0.0
+                for k ∈ 1:nold
+                    if k == j
+                        continue
+                    end
+                    temp += prod((newgrid[i]^2 - oldgrid[l]^2) for l ∈ 1:nold if l ∉ (j,k))
+                end
+                result[i] += this_f * temp * 2.0 * newgrid[i] * one_over_denominator
+            end
+        end
+    end
+
+    return nothing
+end
+
+function fill_interpolate_symmetric_matrix!(matrix_slice, newgrid, oldgrid)
+    nnew = length(newgrid)
+    nold = length(oldgrid)
+
+    @boundscheck size(matrix_slice) == (nnew, nold) || error("Dimensions of matrix_slice ($(size(matrix_slice))) are not the same as (nnew($nnew),nold($nold)).")
+
+    if nold == 1
+        # Interpolating 'polynomial' is just a constant
+        matrix_slice .= 1.0
+    else
+        for j ∈ 1:nold
+            one_over_denominator = 1.0 / prod((oldgrid[j]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j)
+            for i ∈ 1:nnew
+                matrix_slice[i,j] = prod((newgrid[i]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j) * one_over_denominator
+            end
+        end
+    end
+
+    return nothing
+end
+
 end # interpolation
diff --git a/moment_kinetics/src/lagrange_polynomials.jl b/moment_kinetics/src/lagrange_polynomials.jl
index d82a895f0..7dc8ab13d 100644
--- a/moment_kinetics/src/lagrange_polynomials.jl
+++ b/moment_kinetics/src/lagrange_polynomials.jl
@@ -8,7 +8,7 @@ their being scattered (and possibly duplicated) in other modules.
 """
 module lagrange_polynomials
 
-export lagrange_poly, lagrange_poly_optimised
+export lagrange_poly, lagrange_poly_optimised, lagrange_poly_derivative_optimised
 
 """
 Lagrange polynomial
@@ -50,4 +50,31 @@ function lagrange_poly_optimised(other_nodes, one_over_denominator, x)
     return prod(x - n for n ∈ other_nodes) * one_over_denominator
 end
 
+"""
+    lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, x)
+
+Optimised calculation of the first derivative of a Lagrange polynomial, making use of
+pre-calculated quantities.
+
+`other_nodes` is a vector of the grid points in this element where this Lagrange
+polynomial is zero (the other nodes than the one where it is 1).
+
+`one_over_denominator` is `1/prod(x0 - n for n ∈ other_nodes)` where `x0` is the grid
+point where this Lagrange polynomial is 1.
+
+`x` is the point to evaluate the Lagrange polynomial at.
+"""
+function lagrange_poly_derivative_optimised(other_nodes, one_over_denominator, x)
+    result = 0.0
+    k = length(other_nodes)
+    # Is there a more efficient way of doing this? Not a big deal for now because this
+    # function will only be used to calculate a preconditioner matrix, which is done
+    # rarely.
+    for i ∈ 1:k
+        result += prod(x - other_nodes[j] for j ∈ 1:k if j ≠ i)
+    end
+    result *= one_over_denominator
+    return result
+end
+
 end
diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl
index c6ff98c85..aaf124309 100644
--- a/moment_kinetics/src/moment_kinetics_input.jl
+++ b/moment_kinetics/src/moment_kinetics_input.jl
@@ -213,6 +213,7 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI
         cap_factor_ion_dt=10.0,
         max_pseudotimesteps=1000,
         max_pseudotime=1.0e-2,
+        include_wall_bc_in_preconditioner=false,
         no_restart=false,
         debug_io=false,
        )
diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
index 7b2707d69..aeeb43436 100644
--- a/moment_kinetics/src/nonlinear_solvers.jl
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -66,6 +66,8 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon,Tpretype}
     global_precon_iterations::Base.RefValue{mk_int}
     solves_since_precon_update::Base.RefValue{mk_int}
     precon_dt::Base.RefValue{mk_float}
+    precon_lowerz_vcut_inds::Vector{mk_int}
+    precon_upperz_vcut_inds::Vector{mk_int}
     serial_solve::Bool
     max_nonlinear_iterations_this_step::Base.RefValue{mk_int}
     max_linear_iterations_this_step::Base.RefValue{mk_int}
@@ -113,6 +115,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
 
     linear_restart = nl_solver_input.linear_restart
 
+    n_vcut_inds = 0
     if serial_solve
         H = allocate_float(linear_restart + 1, linear_restart)
         c = allocate_float(linear_restart + 1)
@@ -143,6 +146,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
         end
 
         V = (V_ppar, V_pdf)
+
+        n_vcut_inds = prod(outer_coord_sizes)
     else
         H = allocate_shared_float(linear_restart + 1, linear_restart)
         c = allocate_shared_float(linear_restart + 1)
@@ -271,7 +276,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
                           linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
                           Ref(0), Ref(0), Ref(0),
                           Ref(nl_solver_input.preconditioner_update_interval),
-                          Ref(mk_float(0.0)), serial_solve, Ref(0), Ref(0),
+                          Ref(mk_float(0.0)), zeros(mk_int, n_vcut_inds),
+                          zeros(mk_int, n_vcut_inds), serial_solve, Ref(0), Ref(0),
                           preconditioner_type,
                           nl_solver_input.preconditioner_update_interval, preconditioners)
 end
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 617bf5770..374262198 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -431,6 +431,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"])
         max_pseudotimesteps = t_input["max_pseudotimesteps"]
         max_pseudotime = t_input["max_pseudotime"]
+        include_wall_bc_in_preconditioner = t_input["include_wall_bc_in_preconditioner"]
         electron_t_params = nothing
     elseif electron === false
         debug_io = nothing
@@ -441,6 +442,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         cap_factor_ion_dt = Inf
         max_pseudotimesteps = -1
         max_pseudotime = Inf
+        include_wall_bc_in_preconditioner = false
         electron_t_params = nothing
     else
         debug_io = nothing
@@ -476,6 +478,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         cap_factor_ion_dt = Inf
         max_pseudotimesteps = -1
         max_pseudotime = Inf
+        include_wall_bc_in_preconditioner = false
         electron_t_params = electron
     end
     return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt,
@@ -503,9 +506,9 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
                      mk_float(t_input["constraint_forcing_rate"]),
                      decrease_dt_iteration_threshold, increase_dt_iteration_threshold,
                      mk_float(cap_factor_ion_dt), mk_int(max_pseudotimesteps),
-                     mk_float(max_pseudotime), t_input["write_after_fixed_step_count"],
-                     error_sum_zero, t_input["split_operators"],
-                     t_input["steady_state_residual"],
+                     mk_float(max_pseudotime), include_wall_bc_in_preconditioner,
+                     t_input["write_after_fixed_step_count"], error_sum_zero,
+                     t_input["split_operators"], t_input["steady_state_residual"],
                      mk_float(t_input["converged_residual_value"]),
                      manufactured_solns_input.use_for_advance, t_input["stopfile_name"],
                      debug_io, electron_t_params)
diff --git a/moment_kinetics/test/interpolation_tests.jl b/moment_kinetics/test/interpolation_tests.jl
index bd4a1348f..12ddc5eff 100644
--- a/moment_kinetics/test/interpolation_tests.jl
+++ b/moment_kinetics/test/interpolation_tests.jl
@@ -2,9 +2,11 @@ module InterpolationTests
 
 include("setup.jl")
 
+using moment_kinetics.array_allocation: allocate_float
 using moment_kinetics.coordinates: define_test_coordinate
 using moment_kinetics.interpolation:
-    interpolate_to_grid_1d, interpolate_to_grid_z, interpolate_to_grid_vpa, interpolate_symmetric!
+    interpolate_to_grid_1d, fill_1d_interpolation_matrix!, interpolate_to_grid_z,
+    interpolate_to_grid_vpa, interpolate_symmetric!, fill_interpolate_symmetric_matrix!
 
 using MPI
 
@@ -13,6 +15,9 @@ using MPI
 test_function(L, coords...) =
     [cospi(2.0*sum(x)/L)*exp(-sinpi(2.0*sum(x)/L)) for x in Iterators.product(coords...)]
 
+test_function_first_derivative(L, coord) =
+    [-2.0*π/L*sinpi(2.0*x/L)*exp(-sinpi(2.0*x/L)) - 2.0*π/L*cospi(2.0*x/L)^2*exp(-sinpi(2.0*x/L)) for x in coord]
+
 println("interpolation tests")
 
 # define inputs needed for the test
@@ -55,6 +60,26 @@ function runtests()
 
                 @test isapprox(interpolate_to_grid_1d(test_grid, f, z, spectral),
                                expected, rtol=rtol, atol=1.e-14)
+
+                if discretization != "finite_difference"
+                    # Last element of test_grid is on the last grid point.
+                    # interpolate_to_grid_1d() will interpret this as being just outside
+                    # the grid and give the derivative of the extrapolation function
+                    # there, which is not what we want to test, so skip that point.
+                    @test isapprox(interpolate_to_grid_1d(test_grid[1:end-1], f, z, spectral, Val(1)),
+                                   test_function_first_derivative(z.L, test_grid[1:end-1]),
+                                   rtol=rtol, atol=1.e-14)
+                end
+
+                if discretization == "gausslegendre_pseudospectral"
+                    @testset "matrix" begin
+                        interp_matrix = allocate_float(length(test_grid), z.n)
+                        interp_matrix .= 0.0
+                        fill_1d_interpolation_matrix!(interp_matrix, test_grid, z, spectral)
+
+                        @test isapprox(interp_matrix * f, expected, rtol=rtol, atol=1.e-14)
+                    end
+                end
             end
 
             y = [y for y in range(5.0, 10.0, length=3)]
@@ -102,6 +127,7 @@ function runtests()
                 x = @. 1.8 * (ix - 1) / (nx - 1) - 1.23
                 first_positive_ind = searchsortedlast(x, 0.0) + 1
                 f = cos.(x)
+                dfdx = -sin.(x)
 
                 expected = f[first_positive_ind:end]
 
@@ -111,6 +137,27 @@ function runtests()
                                               x[1:first_positive_ind-1])
 
                 @test isapprox(result, expected; rtol=rtol, atol=1.0e-14)
+
+                expected_deriv = dfdx[first_positive_ind:end]
+
+                result_deriv = zeros(nx - first_positive_ind + 1)
+                @views interpolate_symmetric!(result_deriv, x[first_positive_ind:end],
+                                              f[1:first_positive_ind-1],
+                                              x[1:first_positive_ind-1], Val(1))
+
+                @test isapprox(result_deriv, expected_deriv; rtol=2.0*rtol, atol=1.0e-14)
+
+                @testset "matrix" begin
+                    interp_matrix = allocate_float(nx - first_positive_ind + 1,
+                                                   first_positive_ind - 1)
+                    interp_matrix .= 0.0
+                    fill_interpolate_symmetric_matrix!(interp_matrix,
+                                                       x[first_positive_ind:end],
+                                                       x[1:first_positive_ind-1])
+
+                    @test isapprox(interp_matrix * f[1:first_positive_ind-1], expected,
+                                   rtol=rtol, atol=1.0e-14)
+                end
             end
 
             @testset "upper to lower $nx" for nx ∈ 4:10
@@ -120,6 +167,7 @@ function runtests()
                 x = @. 1.8 * (ix - 1) / (nx - 1) - 0.57
                 first_positive_ind = searchsortedlast(x, 0.0) + 1
                 f = cos.(x)
+                dfdx = -sin.(x)
 
                 expected = f[1:first_positive_ind-1]
 
@@ -129,6 +177,27 @@ function runtests()
                                               x[first_positive_ind:end])
 
                 @test isapprox(result, expected; rtol=rtol, atol=1.0e-14)
+
+                expected_deriv = dfdx[1:first_positive_ind-1]
+
+                result_deriv = zeros(first_positive_ind-1)
+                @views interpolate_symmetric!(result_deriv, x[1:first_positive_ind-1],
+                                              f[first_positive_ind:end],
+                                              x[first_positive_ind:end], Val(1))
+
+                @test isapprox(result_deriv, expected_deriv; rtol=2.0*rtol, atol=1.0e-14)
+
+                @testset "matrix" begin
+                    interp_matrix = allocate_float(first_positive_ind - 1,
+                                                   nx - first_positive_ind + 1)
+                    interp_matrix .= 0.0
+                    fill_interpolate_symmetric_matrix!(interp_matrix,
+                                                       x[1:first_positive_ind-1],
+                                                       x[first_positive_ind:end])
+
+                    @test isapprox(interp_matrix * f[first_positive_ind:end], expected,
+                                   rtol=rtol, atol=1.0e-14)
+                end
             end
         end
     end
diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl
index a67b2ac1a..6721f7b30 100644
--- a/moment_kinetics/test/jacobian_matrix_tests.jl
+++ b/moment_kinetics/test/jacobian_matrix_tests.jl
@@ -8,7 +8,8 @@ using moment_kinetics: setup_moment_kinetics, cleanup_moment_kinetics!
 using moment_kinetics.analysis: vpagrid_to_dzdt
 using moment_kinetics.array_allocation: allocate_shared_float
 using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!,
-                                           enforce_vperp_boundary_condition!
+                                           enforce_vperp_boundary_condition!,
+                                           skip_f_electron_bc_points_in_Jacobian
 using moment_kinetics.calculus: derivative!
 using moment_kinetics.communication
 using moment_kinetics.derivatives: derivative_z!, derivative_z_pdf_vpavperpz!
@@ -28,10 +29,13 @@ using moment_kinetics.electron_kinetic_equation: add_contribution_from_pdf_term!
                                                  add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!,
                                                  add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!,
                                                  electron_kinetic_equation_euler_update!,
+                                                 enforce_boundary_condition_on_electron_pdf!,
                                                  fill_electron_kinetic_equation_Jacobian!,
                                                  fill_electron_kinetic_equation_v_only_Jacobian!,
                                                  fill_electron_kinetic_equation_z_only_Jacobian_f!,
-                                                 fill_electron_kinetic_equation_z_only_Jacobian_ppar!
+                                                 fill_electron_kinetic_equation_z_only_Jacobian_ppar!,
+                                                 add_wall_boundary_condition_to_Jacobian!,
+                                                 zero_z_boundary_condition_points
 using moment_kinetics.electron_vpa_advection: electron_vpa_advection!,
                                               update_electron_speed_vpa!,
                                               add_electron_vpa_advection_to_Jacobian!,
@@ -55,10 +59,12 @@ using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!,
                                           add_electron_implicit_constraint_forcing_to_z_only_Jacobian!,
                                           add_electron_implicit_constraint_forcing_to_v_only_Jacobian!,
                                           hard_force_moment_constraints!
+using moment_kinetics.timer_utils: reset_mk_timers!
 using moment_kinetics.type_definitions: mk_float
 using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r!,
                                         integrate_over_vspace
 
+using LinearAlgebra
 using moment_kinetics.StatsBase
 
 # Small parameter used to create perturbations to test Jacobian against
@@ -183,6 +189,7 @@ test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix"
                                                                 "initialization_residual_value" => 2.5,
                                                                 "converged_residual_value" => 1.0e-2,
                                                                 "constraint_forcing_rate" => 2.321,
+                                                                "include_wall_bc_in_preconditioner" => true,
                                                                ),
                          "nonlinear_solver" => OptionsDict("nonlinear_max_iterations" => 100,
                                                            "rtol" => 1.0e-5,
@@ -208,6 +215,9 @@ test_input = OptionsDict("output" => OptionsDict("run_name" => "jacobian_matrix"
                         )
 
 function get_mk_state(test_input)
+    # Reset timers in case there was a previous run which did not clean them up.
+    reset_mk_timers!()
+
     mk_state = nothing
     quietoutput() do
         mk_state = setup_moment_kinetics(test_input; skip_electron_solve=true)
@@ -460,7 +470,9 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2)
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
+            if z.bc == "wall"
+                error("z_bc = \"wall\" not supported here yet.")
+            elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
                 # Boundary conditions on incoming part of distribution function. Note
                 # that as density, upar, ppar do not change in this implicit step,
                 # f_electron_newvar, f_old, and residual should all be zero at exactly
@@ -797,7 +809,9 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2)
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
+            if z.bc == "wall"
+                error("z_bc = \"wall\" not supported here yet.")
+            elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
                 # Boundary conditions on incoming part of distribution function. Note
                 # that as density, upar, ppar do not change in this implicit step,
                 # f_electron_newvar, f_old, and residual should all be zero at exactly
@@ -1148,7 +1162,9 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
+            if z.bc == "wall"
+                error("z_bc = \"wall\" not supported here yet.")
+            elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
                 # Boundary conditions on incoming part of distribution function. Note
                 # that as density, upar, ppar do not change in this implicit step,
                 # f_electron_newvar, f_old, and residual should all be zero at exactly
@@ -1444,7 +1460,9 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2)
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
+            if z.bc == "wall"
+                error("z_bc = \"wall\" not supported here yet.")
+            elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
                 # Boundary conditions on incoming part of distribution function. Note
                 # that as density, upar, ppar do not change in this implicit step,
                 # f_electron_newvar, f_old, and residual should all be zero at exactly
@@ -1759,7 +1777,9 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2)
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
+            if z.bc == "wall"
+                error("z_bc = \"wall\" not supported here yet.")
+            elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
                 # Boundary conditions on incoming part of distribution function. Note
                 # that as density, upar, ppar do not change in this implicit step,
                 # f_electron_newvar, f_old, and residual should all be zero at exactly
@@ -2088,7 +2108,9 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2)
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
+            if z.bc == "wall"
+                error("z_bc = \"wall\" not supported here yet.")
+            elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
                 # Boundary conditions on incoming part of distribution function. Note
                 # that as density, upar, ppar do not change in this implicit step,
                 # f_electron_newvar, f_old, and residual should all be zero at exactly
@@ -2431,7 +2453,9 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
+            if z.bc == "wall"
+                error("z_bc = \"wall\" not supported here yet.")
+            elseif (z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
                 # Boundary conditions on incoming part of distribution function. Note
                 # that as density, upar, ppar do not change in this implicit step,
                 # f_electron_newvar, f_old, and residual should all be zero at exactly
@@ -3098,18 +3122,22 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2
 end
 
 function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
-    test_input = deepcopy(test_input)
-    test_input["output"]["run_name"] *= "_electron_kinetic_equation"
-    println("    - electron_kinetic_equation")
 
-    @testset "electron_kinetic_equation" begin
+    # Looser rtol for "wall" bc because integral corrections not accounted for in wall bc
+    # Jacobian (yet?).
+    @testset "electron_kinetic_equation bc=$bc" for (bc, adi_tol) ∈ (("constant", 1.0e-15), ("wall", 1.0e-13))
+        println("    - electron_kinetic_equation $bc")
+        this_test_input = deepcopy(test_input)
+        this_test_input["output"]["run_name"] *= "_electron_kinetic_equation_$bc"
+        this_test_input["z"]["bc"] = bc
+
         # Suppress console output while running
         pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa,
             vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs,
             composition, collisions, geometry, gyroavs, boundary_distributions,
             external_source_settings, num_diss_params, nl_solver_params, advance,
             advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list,
-            ascii_io, io_moments, io_dfns = get_mk_state(test_input)
+            ascii_io, io_moments, io_dfns = get_mk_state(this_test_input)
 
         dens = @view moments.electron.dens[:,ir]
         upar = @view moments.electron.upar[:,ir]
@@ -3270,9 +3298,9 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             # Add 'explicit' contribution
             # Use jacobian_matrix as a temporary buffer here.
             fill_electron_kinetic_equation_Jacobian!(
-                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
-                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect,
-                scratch_dummy, external_source_settings, num_diss_params,
+                jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z,
+                vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+                vpa_advect, scratch_dummy, external_source_settings, num_diss_params,
                 t_params.electron, ion_dt, ir, true, :explicit_v)
             begin_serial_region()
             @serial_region begin
@@ -3280,10 +3308,10 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             end
 
             fill_electron_kinetic_equation_Jacobian!(
-                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
-                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
-                external_source_settings, num_diss_params, t_params.electron, ion_dt, ir,
-                true)
+                jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z,
+                vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+                vpa_advect, scratch_dummy, external_source_settings, num_diss_params,
+                t_params.electron, ion_dt, ir, true)
 
             begin_serial_region()
             @serial_region begin
@@ -3292,7 +3320,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
                 # Jacobian matrix functions without being too messed up by floating-point
                 # rounding errors. The result is that some entries in the Jacobian matrix
                 # here are O(1.0e5), so it is important to use `rtol` here.
-                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-15, atol=1.0e-15)
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=adi_tol, atol=1.0e-15)
             end
         end
 
@@ -3320,18 +3348,18 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
                     jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], ppar[iz],
                     dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments,
                     zeroth_moment[iz], first_moment[iz], second_moment[iz],
-                    third_moment[iz], dthird_moment_dz[iz], collisions, composition, z,
-                    vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
-                    vpa_advect, scratch_dummy, external_source_settings, num_diss_params,
-                    t_params.electron, ion_dt, ir, iz, true)
+                    third_moment[iz], dthird_moment_dz[iz], fields.phi[iz,ir], collisions,
+                    composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
+                    z_advect, vpa_advect, scratch_dummy, external_source_settings,
+                    num_diss_params, t_params.electron, ion_dt, ir, iz, true)
             end
 
             # Add 'explicit' contribution
             # Use jacobian_matrix as a temporary buffer here.
             fill_electron_kinetic_equation_Jacobian!(
-                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
-                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect,
-                scratch_dummy, external_source_settings, num_diss_params,
+                jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z,
+                vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+                vpa_advect, scratch_dummy, external_source_settings, num_diss_params,
                 t_params.electron, ion_dt, ir, true, :explicit_z)
 
             begin_serial_region()
@@ -3340,10 +3368,10 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             end
 
             fill_electron_kinetic_equation_Jacobian!(
-                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
-                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
-                external_source_settings, num_diss_params, t_params.electron, ion_dt, ir,
-                true)
+                jacobian_matrix, f, ppar, moments, fields.phi, collisions, composition, z,
+                vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+                vpa_advect, scratch_dummy, external_source_settings, num_diss_params,
+                t_params.electron, ion_dt, ir, true)
 
             begin_serial_region()
             @serial_region begin
@@ -3352,7 +3380,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
                 # Jacobian matrix functions without being too messed up by floating-point
                 # rounding errors. The result is that some entries in the Jacobian matrix
                 # here are O(1.0e5), so it is important to use `rtol` here.
-                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-13, atol=1.0e-13)
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=10.0*adi_tol, atol=1.0e-13)
             end
         end
 
@@ -3420,36 +3448,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
                                                   vperp, vperp_spectral, vperp_adv,
                                                   vperp_diffusion, ir)
             end
-            if (z.bc == "wall" || z.bc == "constant") && (z.irank == 0 || z.irank == z.nrank - 1)
-                # Boundary conditions on incoming part of distribution function. Note
-                # that as density, upar, ppar do not change in this implicit step,
-                # f_electron_newvar, f_old, and residual should all be zero at exactly
-                # the same set of grid points, so it is reasonable to zero-out
-                # `residual` to impose the boundary condition. We impose this after
-                # subtracting f_old in case rounding errors, etc. mean that at some
-                # point f_old had a different boundary condition cut-off index.
-                begin_vperp_vpa_region()
-                v_unnorm = vpa.scratch
-                zero = 1.0e-14
-                if z.irank == 0
-                    iz = 1
-                    v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true)
-                    @loop_vperp_vpa ivperp ivpa begin
-                        if v_unnorm[ivpa] > -zero
-                            residual_f[ivpa,ivperp,iz] = 0.0
-                        end
-                    end
-                end
-                if z.irank == z.nrank - 1
-                    iz = z.n
-                    v_unnorm .= vpagrid_to_dzdt(vpa.grid, vth[iz], upar[iz], true, true)
-                    @loop_vperp_vpa ivperp ivpa begin
-                        if v_unnorm[ivpa] < zero
-                            residual_f[ivpa,ivperp,iz] = 0.0
-                        end
-                    end
-                end
-            end
+            zero_z_boundary_condition_points(residual_f, z, vpa, moments, ir)
             return nothing
         end
 
@@ -3457,15 +3456,30 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
         original_residual_p = allocate_shared_float(size(ppar)...)
         perturbed_residual_f = allocate_shared_float(size(f)...)
         perturbed_residual_p = allocate_shared_float(size(ppar)...)
+        f_plus_delta_f = allocate_shared_float(size(f)...)
+        f_with_delta_p = allocate_shared_float(size(f)...)
+        begin_z_vperp_vpa_region()
+        @loop_z_vperp_vpa iz ivperp ivpa begin
+            f_plus_delta_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + delta_f[ivpa,ivperp,iz]
+            f_with_delta_p[ivpa,ivperp,iz] = f[ivpa,ivperp,iz]
+        end
+        p_plus_delta_p = allocate_shared_float(size(ppar)...)
+        begin_z_region()
+        @loop_z iz begin
+            p_plus_delta_p[iz] = ppar[iz] + delta_p[iz]
+        end
 
         @testset "δf only" begin
             residual_func!(original_residual_f, original_residual_p, f, ppar)
-            residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar)
+            residual_func!(perturbed_residual_f, perturbed_residual_p, f_plus_delta_f, ppar)
 
             begin_serial_region()
             @serial_region begin
                 delta_state = zeros(mk_float, total_size)
-                delta_state[1:pdf_size] .= vec(delta_f)
+                # Take this difference rather than using delta_f directly because we need
+                # the effect of the boundary condition having been applied to
+                # f_plus_delta_f.
+                delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f)
                 residual_update_with_Jacobian = jacobian_matrix * delta_state
                 perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size]
                 perturbed_with_Jacobian_p = vec(original_residual_p) .+ residual_update_with_Jacobian[pdf_size+1:end]
@@ -3483,11 +3497,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
 
         @testset "δp only" begin
             residual_func!(original_residual_f, original_residual_p, f, ppar)
-            residual_func!(perturbed_residual_f, perturbed_residual_p, f, ppar.+delta_p)
+            residual_func!(perturbed_residual_f, perturbed_residual_p, f_with_delta_p, p_plus_delta_p)
 
             begin_serial_region()
             @serial_region begin
                 delta_state = zeros(mk_float, total_size)
+                # Take this difference rather than using delta_f directly because we need
+                # the effect of the boundary condition having been applied to
+                # f_with_delta_p.
+                delta_state[1:pdf_size] .= vec(f_with_delta_p .- f)
                 delta_state[pdf_size+1:end] .= vec(delta_p)
                 residual_update_with_Jacobian = jacobian_matrix * delta_state
                 perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size]
@@ -3506,12 +3524,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
 
         @testset "δf and δp" begin
             residual_func!(original_residual_f, original_residual_p, f, ppar)
-            residual_func!(perturbed_residual_f, perturbed_residual_p, f.+delta_f, ppar.+delta_p)
+            residual_func!(perturbed_residual_f, perturbed_residual_p, f_plus_delta_f, p_plus_delta_p)
 
             begin_serial_region()
             @serial_region begin
                 delta_state = zeros(mk_float, total_size)
-                delta_state[1:pdf_size] .= vec(delta_f)
+                # Take this difference rather than using delta_f directly because we need
+                # the effect of the boundary condition having been applied to
+                # f_plus_delta_f.
+                delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f)
                 delta_state[pdf_size+1:end] .= vec(delta_p)
                 residual_update_with_Jacobian = jacobian_matrix * delta_state
                 perturbed_with_Jacobian_f = vec(original_residual_f) .+ residual_update_with_Jacobian[1:pdf_size]
@@ -3534,6 +3555,408 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
     return nothing
 end
 
+function test_electron_wall_bc(test_input; atol=(7.0*epsilon)^2)
+    test_input = deepcopy(test_input)
+    test_input["output"]["run_name"] *= "_electron_wall_bc"
+    println("    - electron_wall_bc")
+
+    # This test only affects the end-points in z, so only include those points to avoid an
+    # over-optimistic error estimate due the time update matrix for all other z-indices
+    # just being the identity.
+    test_input["z"]["nelement"] = 1
+    test_input["z"]["ngrid"] = 2
+    test_input["z"]["bc"] = "wall"
+
+    # Interpolation done during the boundary condition needs to be reasonably accurate for
+    # the simplified form (without constraints) that is done in the 'Jacobian matrix' to
+    # match the full version, so increase vpa resolution.
+    test_input["vpa"]["nelement"] = 256
+    test_input["vz"]["nelement"] = 256
+
+    @testset "electron_wall_bc" begin
+        # Suppress console output while running
+        pdf, scratch, scratch_implicit, scratch_electron, t_params, vz, vr, vzeta, vpa,
+            vperp, gyrophase, z, r, moments, fields, spectral_objects, advection_structs,
+            composition, collisions, geometry, gyroavs, boundary_distributions,
+            external_source_settings, num_diss_params, nl_solver_params, advance,
+            advance_implicit, fp_arrays, scratch_dummy, manufactured_source_list,
+            ascii_io, io_moments, io_dfns = get_mk_state(test_input)
+
+        dens = @view moments.electron.dens[:,ir]
+        upar = @view moments.electron.upar[:,ir]
+        ppar = @view moments.electron.ppar[:,ir]
+        vth = @view moments.electron.vth[:,ir]
+        qpar = @view moments.electron.qpar[:,ir]
+        ddens_dz = @view moments.electron.ddens_dz[:,ir]
+        dppar_dz = @view moments.electron.dppar_dz[:,ir]
+        phi = @view fields.phi[:,ir]
+        z_spectral = spectral_objects.z_spectral
+        vperp_spectral = spectral_objects.vperp_spectral
+        vpa_spectral = spectral_objects.vpa_spectral
+        z_advect = advection_structs.z_advect
+        vpa_advect = advection_structs.vpa_advect
+        me = composition.me_over_mi
+
+        buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1]
+        buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1]
+        buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1]
+        buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1]
+
+        v_size = vperp.n * vpa.n
+
+        # Reconstruct w_∥^3 moment of g_e from already-calculated qpar
+        third_moment = scratch_dummy.buffer_z_1
+        dthird_moment_dz = scratch_dummy.buffer_z_2
+        begin_z_region()
+        @loop_z iz begin
+            third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz]
+        end
+        derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2,
+                      buffer_3, buffer_4, z_spectral, z)
+
+        begin_vperp_vpa_region()
+        update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir)
+        z_speed = @view z_advect[1].speed[:,:,:,ir]
+
+        delta_p = allocate_shared_float(size(ppar)...)
+        p_amplitude = epsilon * maximum(ppar)
+        f = @view pdf.electron.norm[:,:,:,ir]
+        begin_serial_region()
+        @serial_region begin
+            # Make sure initial condition has some z-variation. As f is 'moment kinetic' this
+            # means f must have a non-Maxwellian part that varies in z.
+            f .*= 1.0 .+ 1.0e-4 .* reshape(vpa.grid.^3, vpa.n, 1, 1) .* reshape(sin.(2.0.*π.*z.grid./z.L), 1, 1, z.n)
+        end
+        # Ensure initial electron distribution function obeys constraints
+        hard_force_moment_constraints!(reshape(f, vpa.n, vperp.n, z.n, 1), moments, vpa)
+        # enforce the boundary condition(s) on the electron pdf
+        @views enforce_boundary_condition_on_electron_pdf!(
+                   f, phi, vth, upar, z, vperp, vpa, vperp_spectral, vpa_spectral,
+                   vpa_advect, moments,
+                   num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
+                   composition.me_over_mi, ir; bc_constraints=false, update_vcut=false)
+        delta_f = allocate_shared_float(size(f)...)
+        f_amplitude = epsilon * maximum(f)
+        # Use exp(sin()) in vpa so that perturbation does not have any symmetry that makes
+        # low-order moments vanish exactly.
+        # For this test have no z-dependence in delta_f so that it does not vanish
+        # at the z-boundaries
+        begin_serial_region()
+        @serial_region begin
+            @. delta_p = p_amplitude
+
+            delta_f .= f_amplitude .*
+                       reshape(exp.(sin.(2.0.*π.*test_wavenumber.*vpa.grid./vpa.L)) .- 1.0, vpa.n, 1, 1) .*
+                       f
+        end
+
+        pdf_size = length(f)
+        p_size = length(ppar)
+        total_size = pdf_size + p_size
+
+        dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
+        begin_z_vperp_region()
+        update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid,
+                                   external_source_settings.electron, ir)
+        @loop_z_vperp iz ivperp begin
+            @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir]
+        end
+        #calculate the upwind derivative of the electron pdf w.r.t. wpa
+        @loop_z_vperp iz ivperp begin
+            @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa,
+                               vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral)
+        end
+
+        jacobian_matrix = allocate_shared_float(total_size, total_size)
+        begin_serial_region()
+        @serial_region begin
+            jacobian_matrix .= 0.0
+        end
+        begin_z_vperp_vpa_region()
+        @loop_z_vperp_vpa iz ivperp ivpa begin
+            # Rows corresponding to pdf_electron
+            row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa
+
+            # Initialise identity matrix.
+            jacobian_matrix[row,row] = 1.0
+        end
+        begin_z_region()
+        @loop_z iz begin
+            # Rows corresponding to electron_ppar
+            row = pdf_size + iz
+
+            # Initialise identity matrix.
+            jacobian_matrix[row,row] = 1.0
+        end
+
+        add_wall_boundary_condition_to_Jacobian!(
+            jacobian_matrix, phi, f, ppar, vth, upar, z, vperp, vpa, vperp_spectral,
+            vpa_spectral, vpa_advect, moments,
+            num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :all;
+            ppar_offset=pdf_size)
+
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            begin_z_vperp_vpa_region()
+            @loop_z_vperp_vpa iz ivperp ivpa begin
+                # Rows corresponding to pdf_electron
+                row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa
+
+                # Initialise identity matrix.
+                jacobian_matrix_ADI_check[row,:] .= 0.0
+                jacobian_matrix_ADI_check[row,row] = 1.0
+            end
+            begin_z_region()
+            @loop_z iz begin
+                # Rows corresponding to electron_ppar
+                row = pdf_size + iz
+
+                # Initialise identity matrix.
+                jacobian_matrix_ADI_check[row,:] .= 0.0
+                jacobian_matrix_ADI_check[row,row] = 1.0
+            end
+
+            # There is no 'implicit z' contribution for wall bc
+
+            # Add 'explicit' contribution
+            add_wall_boundary_condition_to_Jacobian!(
+                jacobian_matrix_ADI_check, phi, f, ppar, vth, upar, z, vperp, vpa,
+                vperp_spectral, vpa_spectral, vpa_advect, moments,
+                num_diss_params.electron.vpa_dissipation_coefficient, me, ir, :explicit_v;
+                ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            begin_z_vperp_vpa_region()
+            @loop_z_vperp_vpa iz ivperp ivpa begin
+                # Rows corresponding to pdf_electron
+                row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa
+
+                # Initialise identity matrix.
+                jacobian_matrix_ADI_check[row,:] .= 0.0
+                jacobian_matrix_ADI_check[row,row] = 1.0
+            end
+            begin_z_region()
+            @loop_z iz begin
+                # Rows corresponding to electron_ppar
+                row = pdf_size + iz
+
+                # Initialise identity matrix.
+                jacobian_matrix_ADI_check[row,:] .= 0.0
+                jacobian_matrix_ADI_check[row,row] = 1.0
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_wall_boundary_condition_to_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], phi[iz], f[:,:,iz],
+                    ppar[iz], vth[iz], upar[iz], z, vperp, vpa, vperp_spectral,
+                    vpa_spectral, vpa_advect, moments,
+                    num_diss_params.electron.vpa_dissipation_coefficient, me, ir,
+                    :implicit_v, iz; ppar_offset=v_size)
+            end
+
+            # There is no 'explicit vpa' contribution for wall bc
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        function residual_func!(residual, this_f, this_p)
+            begin_z_region()
+            @loop_z iz begin
+                # update the electron thermal speed using the updated electron
+                # parallel pressure
+                vth[iz] = sqrt(abs(2.0 * this_p[iz] /
+                                   (dens[iz] * composition.me_over_mi)))
+            end
+
+            # enforce the boundary condition(s) on the electron pdf
+            @views enforce_boundary_condition_on_electron_pdf!(
+                       this_f, phi, vth, upar, z, vperp, vpa, vperp_spectral,
+                       vpa_spectral, vpa_advect, moments,
+                       num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
+                       composition.me_over_mi, ir; bc_constraints=false,
+                       update_vcut=false)
+
+            # electron_kinetic_equation_euler_update!() just adds dt*d(g_e)/dt to the
+            # electron_pdf member of the first argument, so if we set the electron_pdf member
+            # of the first argument to zero, and pass dt=1, then it will evaluate the time
+            # derivative, which is the residual for a steady-state solution.
+            begin_z_vperp_vpa_region()
+            @loop_z_vperp_vpa iz ivperp ivpa begin
+                residual[ivpa,ivperp,iz] = f[ivpa,ivperp,iz]
+            end
+            # Now
+            #   residual = f_electron_old + dt*RHS(f_electron_newvar)
+            # so update to desired residual
+            begin_z_vperp_vpa_region()
+            @loop_z_vperp_vpa iz ivperp ivpa begin
+                residual[ivpa,ivperp,iz] = this_f[ivpa,ivperp,iz] - residual[ivpa,ivperp,iz]
+            end
+
+            # Set residual to zero where pdf_electron is determined by boundary conditions.
+            if vpa.n > 1
+                begin_z_vperp_region()
+                @loop_z_vperp iz ivperp begin
+                    @views enforce_v_boundary_condition_local!(residual[:,ivperp,iz], vpa.bc,
+                                                               vpa_advect[1].speed[:,ivperp,iz,ir],
+                                                               num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
+                                                               vpa, vpa_spectral)
+                end
+            end
+            if vperp.n > 1
+                begin_z_vpa_region()
+                enforce_vperp_boundary_condition!(residual, vperp.bc,
+                                                  vperp, vperp_spectral, vperp_adv,
+                                                  vperp_diffusion, ir)
+            end
+            if z.bc == "wall"
+                zero_z_boundary_condition_points(residual, z, vpa, moments, ir)
+            else
+                error("Testing wall bc but z_bc != \"wall\".")
+            end
+
+            return nothing
+        end
+
+        original_residual = allocate_shared_float(size(f)...)
+        perturbed_residual = allocate_shared_float(size(f)...)
+        f_plus_delta_f = allocate_shared_float(size(f)...)
+        f_with_delta_p = allocate_shared_float(size(f)...)
+        begin_z_vperp_vpa_region()
+        @loop_z_vperp_vpa iz ivperp ivpa begin
+            f_plus_delta_f[ivpa,ivperp,iz] = f[ivpa,ivperp,iz] + delta_f[ivpa,ivperp,iz]
+            f_with_delta_p[ivpa,ivperp,iz] = f[ivpa,ivperp,iz]
+        end
+        p_plus_delta_p = allocate_shared_float(size(ppar)...)
+        begin_z_region()
+        @loop_z iz begin
+            p_plus_delta_p[iz] = ppar[iz] + delta_p[iz]
+        end
+
+        @testset "δf only" begin
+            residual_func!(original_residual, f, ppar)
+            residual_func!(perturbed_residual, f_plus_delta_f, ppar)
+
+            begin_serial_region()
+            @serial_region begin
+                delta_state = zeros(mk_float, total_size)
+                # Take this difference rather than using delta_f directly because we need
+                # the effect of the boundary condition having been applied to
+                # f_plus_delta_f.
+                delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f)
+                residual_update_with_Jacobian = jacobian_matrix * delta_state
+                perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size]
+
+                # Check ppar did not get perturbed by the Jacobian
+                @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end],
+                                           zeros(p_size); atol=1.0e-15)
+
+                # Check that something happened, to make sure that for example the
+                # residual function and Jacobian don't both just zero out the boundary
+                # points.
+                @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12
+
+                # If the boundary condition is correctly implemented in the Jacobian, then
+                # if f+delta_f obeys the boundary condition, then J*delta_state should
+                # give zeros in the boundary points.
+                @test elementwise_isapprox(perturbed_residual,
+                                           reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n);
+                                           rtol=0.0, atol=atol)
+            end
+        end
+
+        @testset "δp only" begin
+            residual_func!(original_residual, f, ppar)
+            residual_func!(perturbed_residual, f_with_delta_p, p_plus_delta_p)
+
+            begin_serial_region()
+            @serial_region begin
+                delta_state = zeros(mk_float, total_size)
+                # Take this difference rather than using delta_f directly because we need
+                # the effect of the boundary condition having been applied to
+                # f_with_delta_p.
+                delta_state[1:pdf_size] .= vec(f_with_delta_p .- f)
+                delta_state[pdf_size+1:end] .= vec(delta_p)
+                residual_update_with_Jacobian = jacobian_matrix * delta_state
+                perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size]
+
+                # Check ppar did not get perturbed by the Jacobian
+                @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end],
+                                           vec(delta_p); atol=1.0e-15)
+
+                # Check that something happened, to make sure that for example the
+                # residual function and Jacobian don't both just zero out the boundary
+                # points.
+                @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12
+
+                # Use an absolute tolerance for this test because if we used a norm_factor
+                # like the other tests, it would be zero to machine precision at some
+                # points.
+                @test elementwise_isapprox(perturbed_residual,
+                                           reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n);
+                                           rtol=0.0, atol=atol)
+            end
+        end
+
+        @testset "δf and δp" begin
+            residual_func!(original_residual, f, ppar)
+            residual_func!(perturbed_residual, f_plus_delta_f, p_plus_delta_p)
+
+            begin_serial_region()
+            @serial_region begin
+                delta_state = zeros(mk_float, total_size)
+                # Take this difference rather than using delta_f directly because we need
+                # the effect of the boundary condition having been applied to
+                # f_plus_delta_f.
+                delta_state[1:pdf_size] .= vec(f_plus_delta_f .- f)
+                delta_state[pdf_size+1:end] .= vec(delta_p)
+                residual_update_with_Jacobian = jacobian_matrix * delta_state
+                perturbed_with_Jacobian = vec(original_residual) .+ residual_update_with_Jacobian[1:pdf_size]
+
+                # Check ppar did not get perturbed by the Jacobian
+                @test elementwise_isapprox(residual_update_with_Jacobian[pdf_size+1:end],
+                                           vec(delta_p); atol=1.0e-15)
+
+                # Check that something happened, to make sure that for example the
+                # residual function and Jacobian don't both just zero out the boundary
+                # points.
+                @test norm(vec(perturbed_residual) .- perturbed_with_Jacobian) > 1.0e-12
+
+                # Use an absolute tolerance for this test because if we used a norm_factor
+                # like the other tests, it would be zero to machine precision at some
+                # points.
+                @test elementwise_isapprox(perturbed_residual,
+                                           reshape(perturbed_with_Jacobian, vpa.n, vperp.n, z.n);
+                                           rtol=0.0, atol=atol)
+            end
+        end
+
+        cleanup_mk_state!(ascii_io, io_moments, io_dfns)
+    end
+
+    return nothing
+end
+
 function runtests()
     if Sys.isapple() && "CI" ∈ keys(ENV) && global_size[] > 1
         # These tests are too slow in the parallel tests job on macOS, so skip in that
@@ -3556,6 +3979,7 @@ function runtests()
         test_electron_implicit_constraint_forcing(test_input)
         test_electron_energy_equation(test_input)
         test_ion_dt_forcing_of_electron_ppar(test_input)
+        test_electron_wall_bc(test_input)
         test_electron_kinetic_equation(test_input)
     end
 
diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl
index 36c74eb21..81d4c87c7 100644
--- a/moment_kinetics/test/nonlinear_solver_tests.jl
+++ b/moment_kinetics/test/nonlinear_solver_tests.jl
@@ -58,7 +58,8 @@ function linear_test()
                                zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
                                zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
                                zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
-                               zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_int, 0), zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0),
                                zeros(mk_float, 0, 0), zeros(mk_float, 0, 0),
                                advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0),
                                zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n,
@@ -171,7 +172,8 @@ function nonlinear_test()
                                zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
                                zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
                                zeros(mk_float, 0), zeros(mk_float, 0), zeros(mk_float, 0),
-                               zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_int, 0), zeros(mk_float, 0), zeros(mk_float, 0),
+                               zeros(mk_float, 0), zeros(mk_int, 0), zeros(mk_int, 0),
                                zeros(mk_float, 0, 0), zeros(mk_float, 0, 0),
                                advection_input("", 0.0, 0.0, 0.0), zeros(mk_float, 0),
                                zeros(mk_float, 0), MPI.COMM_NULL, 1:n, 1:n,